feat: 1.0

2026-05-30 01:38:27 +08:00 · 2026-05-30 01:38:27 +08:00 · e1330451a5
commit e1330451a5
parent 2b543f5e37
2277 changed files with 22298 additions and 232612 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,13 +1,79 @@
-.git/
+# Git
-.idea/
+.git
-.vscode/
+.gitignore
 node_modules/
 *.log
 .env
 .env.local
 .env.*.local
-# Exclude all target/ content, then selectively re-include release binaries
+# IDE
 .idea
 .vscode
 *.swp
 *.swo
 *~
 # Rust build artifacts
 target/
-!target/release/
+**/target/
-!target/x86_64-unknown-linux-gnu/release/
+
 # Node.js
 node_modules/
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 pnpm-debug.log*
 bun.lockb
 # Build output
 dist/
 build/
 # Environment and secrets
 .env
 .env.*
 !.env.example
 # Docker
 docker/
 docker-compose*.yml
 .dockerignore
 Dockerfile*
 *.Dockerfile
 # Documentation
 *.md
 LICENSE
 doc/
 # Test and CI
 tests/
 __tests__/
 *.test.*
 *.spec.*
 .github/
 .gitlab/
 .circleci/
 # OS files
 .DS_Store
 Thumbs.db
 desktop.ini
 # Logs
 logs/
 *.log
 # Temporary files
 tmp/
 temp/
 .tmp/
 # Certificates (use secrets in production)
 cert/
 # Data directories
 data/
 # Agent configs
 .agent/
 .agents/
 .claude/
 CLAUDE.md
 AGENTS.md
--- a/.env.example
+++ b/.env.example
@ -1,136 +0,0 @@
 # =============================================================================
 # Required - 程序启动必须配置
 # =============================================================================
 # 数据库连接
 APP_DATABASE_URL=postgresql://user:password@localhost:5432/dbname
 APP_DATABASE_SCHEMA_SEARCH_PATH=public
 # Redis（支持多节点，逗号分隔）
 APP_REDIS_URL=redis://localhost:6379
 # APP_REDIS_URLS=redis://localhost:6379,redis://localhost:6378
 # AI 服务
 APP_AI_BASIC_URL=https://api.openai.com/v1
 APP_AI_API_KEY=sk-xxxxx
 # Embedding + 向量检索
 APP_EMBED_MODEL_BASE_URL=https://api.openai.com/v1
 APP_EMBED_MODEL_API_KEY=sk-xxxxx
 APP_EMBED_MODEL_NAME=text-embedding-3-small
 APP_EMBED_MODEL_DIMENSIONS=1536
 APP_QDRANT_URL=http://localhost:6333
 # APP_QDRANT_API_KEY=
 # SMTP 邮件
 APP_SMTP_HOST=smtp.example.com
 APP_SMTP_PORT=587
 APP_SMTP_USERNAME=noreply@example.com
 APP_SMTP_PASSWORD=xxxxx
 APP_SMTP_FROM=noreply@example.com
 APP_SMTP_TLS=true
 APP_SMTP_TIMEOUT=30
 # 文件存储
 APP_AVATAR_PATH=/data/avatars
 # Git 仓库存储根目录
 APP_REPOS_ROOT=/data/repos
 # =============================================================================
 # Domain / URL（可选，有默认值）
 # =============================================================================
 APP_DOMAIN_URL=http://127.0.0.1
 # APP_STATIC_DOMAIN=
 # APP_MEDIA_DOMAIN=
 # APP_GIT_HTTP_DOMAIN=
 # =============================================================================
 # Database Pool（可选，有默认值）
 # =============================================================================
 # APP_DATABASE_MAX_CONNECTIONS=10
 # APP_DATABASE_MIN_CONNECTIONS=2
 # APP_DATABASE_IDLE_TIMEOUT=60000    (milliseconds, default: 60s)
 # APP_DATABASE_MAX_LIFETIME=300000   (milliseconds, default: 300s)
 # APP_DATABASE_CONNECTION_TIMEOUT=5000 (milliseconds, default: 5s)
 # APP_DATABASE_REPLICAS=
 # APP_DATABASE_HEALTH_CHECK_INTERVAL=30
 # APP_DATABASE_RETRY_ATTEMPTS=3
 # APP_DATABASE_RETRY_DELAY=5
 # =============================================================================
 # Redis Pool（可选，有默认值）
 # =============================================================================
 # APP_REDIS_POOL_SIZE=10
 # APP_REDIS_CONNECT_TIMEOUT=5
 # APP_REDIS_ACQUIRE_TIMEOUT=5
 # =============================================================================
 # SSH（可选，有默认值）
 # =============================================================================
 # APP_SSH_DOMAIN=
 # APP_SSH_PORT=22
 # APP_SSH_SERVER_PRIVATE_KEY=
 # APP_SSH_SERVER_PUBLIC_KEY=
 # =============================================================================
 # Logging（可选，有默认值）
 # =============================================================================
 # APP_LOG_LEVEL=info
 # APP_LOG_FORMAT=json
 # APP_LOG_FILE_ENABLED=false
 # APP_LOG_FILE_PATH=./logs
 # APP_LOG_FILE_ROTATION=daily
 # APP_LOG_FILE_MAX_FILES=7
 # APP_LOG_FILE_MAX_SIZE=104857600
 # OpenTelemetry（可选，默认关闭）
 # APP_OTEL_ENABLED=false
 # APP_OTEL_ENDPOINT=http://localhost:5080/api/default/v1/traces
 # APP_OTEL_SERVICE_NAME=
 # APP_OTEL_SERVICE_VERSION=
 # APP_OTEL_AUTHORIZATION=
 # APP_OTEL_ORGANIZATION=
 # =============================================================================
 # NATS / Hook Pool（可选，有默认值）
 # =============================================================================
 # HOOK_POOL_MAX_CONCURRENT=（CPU 核数）
 # HOOK_POOL_CPU_THRESHOLD=80.0
 # HOOK_POOL_REDIS_LIST_PREFIX={hook}
 # HOOK_POOL_REDIS_LOG_CHANNEL=hook:logs
 # HOOK_POOL_REDIS_BLOCK_TIMEOUT=5
 # HOOK_POOL_REDIS_MAX_RETRIES=3
 # HOOK_POOL_WORKER_ID=（随机 UUID）
 # =============================================================================
 # Frontend (Vite) — 前端运行环境变量
 # =============================================================================
 # API 基础 URL（为空时使用 Vite dev 代理 /api -> localhost:8080）
 # VITE_API_BASE_URL=http://localhost:8080
 # 前端 WebSocket 连接地址（开发模式通过 Vite 代理）
 VITE_WS_URL=ws://localhost:5080
 # API URL（前端 API 调用，通过 Vite 代理时可为空）
 VITE_API_URL=
 # WebSocket 连接模式: "raw-ws" | "socketio"
 VITE_WS_MODE=raw-ws
 # =============================================================================
 # Frontend: Grafana Faro (RUM) — 前端性能监控（可选）
 # =============================================================================
 # VITE_FARO_ENABLED=false
 # VITE_FARO_URL=https://faro.example.com/collect
 # VITE_FARO_API_KEY=
 # VITE_FARO_APP_NAME=GitDataAIWeb
 # VITE_FARO_APP_ENV=production
 # VITE_FARO_APP_VERSION=0.0.1
--- a/.gitignore
+++ b/.gitignore
@ -1,29 +1,62 @@
 # Rust build artifacts
 /target
-node_modules
+**/target/
-.claude
+
-.zed
+# Rust IDE and tooling
-.vscode
+.idea/
-.idea
+.vscode/
 *.swp
 *.swo
 *~
 # Environment files
 .env
 .env.local
-dist
+.env.*.local
-deploy/secrets.yaml
+.env.production
-.codex
+
-.qwen
+# OS files
-.opencode
+.DS_Store
-.omc
+Thumbs.db
-AGENT.md
+desktop.ini
-ARCHITECTURE.md
+
-.agents
+# Node.js
 .agents.md
 .next
 node_modules/
-coverage/
+npm-debug.log*
-.pnpm-store/
+yarn-debug.log*
-pnpm-lock.yaml
+yarn-error.log*
-package-lock.json
+pnpm-debug.log*
-yarn.lock
+
-.gemini
+# Build output
-.omg
+dist/
-/.sqry
+build/
-deploy/.server.yaml
+
 # Logs
 logs/
 *.log
 # Data and certificates
 data/
 cert/
 # Docker
 docker-compose.override.yml
 .docker/
 # Agent configs
 .claude/
 .codex/
 .agent/
 .agents/
 CLAUDE.md
 AGENTS.md
 migrate.sh
 # Temporary files
 tmp/
 temp/
 .tmp/
 # Backup files
 *.bak
 *.backup
 *~
--- a/.mcp.json
+++ b/.mcp.json
@ -1,11 +0,0 @@
 {
  "mcpServers": {
    "shadcn": {
      "command": "npx",
      "args": [
        "shadcn@latest",
        "mcp"
      ]
    }
  }
 }
--- a/.prettierignore
+++ b/.prettierignore
@ -1,7 +0,0 @@
 node_modules/
 coverage/
 .pnpm-store/
 pnpm-lock.yaml
 package-lock.json
 pnpm-lock.yaml
 yarn.lock
--- a/.prettierrc
+++ b/.prettierrc
@ -1,11 +0,0 @@
 {
  "endOfLine": "lf",
  "semi": false,
  "singleQuote": false,
  "tabWidth": 2,
  "trailingComma": "es5",
  "printWidth": 80,
  "plugins": ["prettier-plugin-tailwindcss"],
  "tailwindStylesheet": "src/index.css",
  "tailwindFunctions": ["cn", "cva"]
 }
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,192 +1,5 @@
 [workspace]
 members = [
    #    "libs/frontend",
    "libs/models",
    "libs/session",
    "libs/git",
    "libs/email",
    "libs/queue",
    "libs/room",
    "libs/config",
    "libs/service",
    "libs/db",
    "libs/api",
    "libs/transport",
    "libs/observability",
    "libs/avatar",
    "libs/agent",
    "libs/migrate",
    "libs/fctool",
    "libs/gingress-proxy",
    "apps/migrate",
    "apps/app",
    "apps/git-hook",
    "apps/gitserver",
    "apps/email",
    "apps/static",
    "apps/metrics",
    "apps/gingress",
 ]
 resolver = "3"
 [workspace.dependencies]
 models = { path = "libs/models" }
 session = { path = "libs/session" }
 git = { path = "libs/git" }
 email = { path = "libs/email" }
 queue = { path = "libs/queue" }
 room = { path = "libs/room" }
 config = { path = "libs/config" }
 service = { path = "libs/service" }
 db = { path = "libs/db" }
 api = { path = "libs/api" }
 agent = { path = "libs/agent" }
 observability = { path = "libs/observability" }
 avatar = { path = "libs/avatar" }
 migrate = { path = "libs/migrate" }
 fctool = { path = "libs/fctool" }
 transport = { path = "libs/transport" }
 metrics-aggregator = { path = "apps/metrics" }
 gingress-proxy = { path = "libs/gingress-proxy" }
 gingress = { path = "apps/gingress" }
 sea-query = "1.0.0-rc.33"
 actix-web = "4.13.0"
 actix-files = "0.6.10"
 actix-cors = "0.7.1"
 actix-session = "0.11.0"
 actix-ws = "0.4.0"
 actix-multipart = "0.7.2"
 actix-analytics = "1.2.1"
 actix-jwt-session = "1.0.7"
 actix-csrf = "0.8.0"
 metrics = "0.24.5"
 actix-rt = "2.11.0"
 actix = "0.13"
 async-stream = "0.3"
 actix-service = "2.0.3"
 actix-utils = "3.0.1"
 redis = "1.1.0"
 anyhow = "1.0.102"
 derive_more = "2.1.1"
 blake3 = "1.8.3"
 argon2 = "0.5.3"
 thiserror = "2.0.18"
 password-hash = "0.6.0"
 awc = "3.8.2"
 bstr = "1.12.1"
 captcha-rs = "0.5.0"
 deadpool-redis = "0.23.0"
 deadpool = "0.13.0"
 dotenv = "0.15.0"
 env_logger = "0.11.10"
 brotli = "7.0"
 flate2 = "1.1.9"
 git2 = "0.20.4"
 slog = "2.8.2"
 git2-ext = "1.0.0"
 git2-hooks = "0.7.0"
 futures = "0.3.32"
 futures-util = "0.3.32"
 globset = "0.4.18"
 hex = "0.4.3"
 lettre = { version = "0.11.19", default-features = false, features = ["tokio1-rustls-tls", "smtp-transport", "builder", "pool"] }
 mime = "0.3.17"
 mime_guess2 = "2.3.1"
 opentelemetry = "0.31.0"
 opentelemetry-otlp = { version = "0.31.0", features = ["http-proto", "trace"] }
 opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio"] }
 opentelemetry-http = "0.31.0"
 prost = "0.14.3"
 prost-build = "0.14.3"
 qdrant-client = "1.17.0"
 prost-types = "0.14.3"
 rand = "0.10.0"
 russh = { version = "0.60.2", default-features = false, features = ["ring", "rsa"] }
 hmac = { version = "0.13" }
 hkdf = "0.13.0"
 sha1_smol = "1.0.1"
 rsa = { version = "0.9.7", package = "rsa" }
 reqwest = { version = "0.13.2", default-features = false }
 dotenvy = "0.15.7"
 # aws-lc-sys requires NASM on Windows, so we use local filesystem storage instead of S3
 # aws-sdk-s3 = "1.127.0"
 sea-orm = "2.0.0-rc.37"
 sea-orm-migration = "2.0.0-rc.37"
 sha1 = "0.11"
 sha2 = "0.11"
 sysinfo = "0.39.1"
 ssh-key = "0.7.0-rc.9"
 tar = "0.4.45"
 zip = "8.3.1"
 tokenizer = "0.1.2"
 tiktoken-rs = "0.11.0"
 regex = "1.12.3"
 jsonwebtoken = "10.3.0"
 once_cell = "1.21.4"
 async-trait = "0.1.89"
 fs2 = "0.4.3"
 image = "0.25.10"
 tokio = "1.50.0"
 tokio-util = "0.7.18"
 tokio-stream = { version = "0.1.18", features = ["sync"] }
 url = "2.5.8"
 tower = "0.5"
 num_cpus = "1.17.0"
 ring = "0.17"
 rustls = { version = "0.23", default-features = false, features = ["ring", "std", "tls12"] }
 clap = "4.6.0"
 time = "0.3.47"
 chrono = "0.4.44"
 tracing = "0.1.44"
 tracing-subscriber = { version = "0.3.23", features = ["env-filter", "json", "tracing-log"] }
 tracing-opentelemetry = "0.32.1"
 tonic = "0.14.5"
 tonic-build = "0.14.5"
 uuid = "1.22.0"
 hostname = "0.4"
 utoipa = { version = "5.4.0", features = ["chrono", "uuid"] }
 rust_decimal = "1.40.0"
 walkdir = "2.5.0"
 calamine = "0.26"
 csv = "1.3"
 lopdf = "0.34"
 pulldown-cmark = "0.12"
 quick-xml = { version = "0.37", features = ["serialize"] }
 sqlparser = "0.55"
 lazy_static = "1.5"
 chacha20poly1305 = "0.10"
 md5 = "0.7"
 moka = "0.12.15"
 dashmap = "7.0.0-rc2"
 serde = "1.0.228"
 serde_json = "1.0.149"
 serde_yaml = "0.9.33"
 serde_bytes = "0.11.19"
 phf = "0.13.1"
 phf_codegen = "0.13.1"
 base64 = "0.22.1"
 base64ct = "1"
 p256 = { version = "0.13", features = ["ecdsa", "std"] }
 # http version varies per-crate (pingora needs 1.x, actix needs 0.2)
 hyper = "0.14"
 tempfile = "3"
 rig-core = { version = "0.36.0", default-features = false }
 tokio-tungstenite = { version = "0.29.0", features = [] }
 async-nats = { version = "0.48.0", features = [] }
 kube = { version = "3.1.0", features = ["runtime", "derive"] }
 k8s-openapi = { version = "0.27", features = ["v1_31"] }
 pingora = { version = "0.8", features = ["proxy"] }
 pingora-proxy = "0.8"
 pingora-load-balancing = "0.8"
 pingora-cache = "0.8"
 rustls-pemfile = "2"
 [workspace.package]
-version = "0.2.9"
+version = "1.0.0"
 edition = "2024"
 authors = []
 description = ""
@ -198,6 +11,31 @@ keywords = []
 categories = []
 documentation = ""
 [workspace]
 members = [
    "app/email",
    "app/gitdata",
    "app/gitpod",
    "app/gitsync",
    "lib/ai",
    "lib/api",
    "lib/cache",
    "lib/channel",
    "lib/config",
    "lib/db",
    "lib/email",
    "lib/git",
    "lib/issues",
    "lib/migrate",
    "lib/model",
    "lib/queue",
    "lib/service",
    "lib/session",
    "lib/storage",
    "lib/parsefile"
 , "lib/socketio"]
 resolver = "3"
 [workspace.lints.rust]
 unsafe_code = "warn"
@ -205,36 +43,105 @@ unsafe_code = "warn"
 unwrap_used = "warn"
 expect_used = "warn"
-[profile.dev]
+[workspace.dependencies]
-debug = 1
+ai = { path = "lib/ai" }
-incremental = true
+api = { path = "lib/api" }
-codegen-units = 256
+cache = { path = "lib/cache" }
 channel = { path = "lib/channel" }
 config = { path = "lib/config" }
 db = { path = "lib/db" }
 email = { path = "lib/email" }
 git = { path = "lib/git" }
 issues = { path = "lib/issues" }
 migrate = { path = "lib/migrate" }
 model = { path = "lib/model" }
 queue = { path = "lib/queue" }
 service = { path = "lib/service" }
 session = { path = "lib/session" }
 storage = { path = "lib/storage" }
 parsefile = { path = "lib/parsefile"}
 socketio = { path = "lib/socketio" }
-[profile.release]
+leptos = "0.8.19"
-lto = "thin"
+leptos_actix = "0.8.7"
-codegen-units = 1
+leptos_meta = "0.8.6"
-strip = true
+leptos_router = "0.8.13"
-opt-level = 3
+server_fn = { version = "0.8.10", features = ["actix"] }
 actix-http = "3.11"
 actix-ws = "0.4.0"
 urlencoding = "2.1"
 serde_urlencoded = "0.7"
-
+juniper = "0.17.1"
-[profile.dev.package.num-bigint-dig]
+ractor = "0.15.13"
-opt-level = 3
+ractor_cluster = "0.15.13"
-
+async-nats = "0.48.0"
-
+petgraph = "0.8.3"
-[package]
+async-openai = "0.40.0"
-name = "workspace"
+rig-core = { version = "0.36.0", default-features = false, features = ["derive"] }
-version.workspace = true
+schemars = "1.2.1"
-edition.workspace = true
+tokio-stream = "0.1.18"
-authors.workspace = true
+duct = "1.1.1"
-description.workspace = true
+lettre = "0.11.22"
-repository.workspace = true
+actix-web = "4"
-readme.workspace = true
+jsonwebtoken = { version = "10.4.0", features = ["rust_crypto"] }
-homepage.workspace = true
+futures-util = "0.3"
-license.workspace = true
+futures = "0.3.32"
-keywords.workspace = true
+moka = "0.12.15"
-categories.workspace = true
+tokio = "1.52.3"
-documentation.workspace = true
+redis = "1.2.1"
-
+serde_json = "1.0.149"
-[lib]
+indexmap = "2.14.0"
-path = "lib.rs"
+sea-orm-migration = "2.0.0-rc.38"
-crate-type = ["lib"]
+sea-orm = { version = "2.0.0-rc.38", features = ["sqlx-all","runtime-tokio","rust_decimal","uuid","chrono"]}
 async-trait = "0.1.89"
 aws-config = "1.8.16"
 aws-sdk-s3 = "1.132.0"
 rust_decimal = "1.42.0"
 utoipa = "5.5.0"
 dotenvy = "0.15.7"
 anyhow = "1.0.102"
 derive_more = "2.1.1"
 serde = "1.0.228"
 serde_yaml = "0.9.33"
 comrak = "0.38"
 sqlparser = "0.62.0"
 qdrant-client = "1.18.0"
 tiktoken-rs = "0.11.0"
 tracing-subscriber = "0.3.23"
 thiserror = "2.0.18"
 uuid = "1.23.1"
 git2 = "0.21.0"
 gix = { version = "0.83.0", features = ["max-performance-safe", "serde", "merge", "blame", "revision", "blob-diff", "worktree-stream", "worktree-archive", "mailmap"] }
 gix-archive = "0.32.0"
 gix-worktree-stream = "0.32.0"
 num_cpus = "1.17.0"
 tracing = "0.1.44"
 actix-service = "2.0.3"
 actix-rt = "2.11.0"
 actix-utils = "3.0.1"
 toasty = "0.6.1"
 chrono = "0.4.44"
 argon2 = "0.5.3"
 rand = "0.10.1"
 rand_core = { version = "0.10.1", features = ["getrandom"] }
 totp-rs = "5.7.1"
 url = "2.5.7"
 sha2 = "0.11.0"
 base64 = "0.22"
 tonic = "0.14.6"
 tonic-build = "0.14.6"
 prost = "0.14.3"
 tonic-prost = "0.14.6"
 dashmap = "6"
 sqlx = "0.9.0"
 russh = { version = "0.61.1", features = ["legacy-ed25519-pkcs8-parser"] }
 hex = "0.4"
 async-stream = "0.3"
 tokio-util = "0.7"
 password-hash = "0.6.1"
 deadpool-redis = { version = "0.23", features = ["cluster"] }
 reqwest = { version = "0.13", features = ["json", "rustls", "system-proxy"] }
 hmac = "0.13"
 mcpkit = "0.5"
 miette = "7"
--- a/README.md
+++ b/README.md
@ -1,21 +0,0 @@
 # React + TypeScript + Vite + shadcn/ui
 This is a template for a new Vite project with React, TypeScript, and shadcn/ui.
 ## Adding components
 To add components to your app, run the following command:
 ```bash
 npx shadcn@latest add button
 ```
 This will place the ui components in the `src/components` directory.
 ## Using components
 To use the components in your app, import them as follows:
 ```tsx
 import { Button } from "@/components/ui/button"
 ```
--- a/apps/app/Cargo.toml
+++ b/apps/app/Cargo.toml
@ -1,39 +0,0 @@
 [package]
 name = "app"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
 description.workspace = true
 repository.workspace = true
 readme.workspace = true
 homepage.workspace = true
 license.workspace = true
 keywords.workspace = true
 categories.workspace = true
 documentation.workspace = true
 [dependencies]
 tokio = { workspace = true, features = ["full"] }
 uuid = { workspace = true }
 service = { workspace = true }
 observability = { workspace = true }
 room = { workspace = true }
 sha2 = { workspace = true }
 hkdf = { workspace = true }
 hmac = { workspace = true }
 api = { workspace = true }
 session = { workspace = true }
 config = { workspace = true }
 db = { workspace = true }
 migrate = { workspace = true }
 actix-web = { workspace = true }
 actix-cors = { workspace = true }
 futures = { workspace = true }
 tracing = { workspace = true }
 anyhow = { workspace = true }
 clap = { workspace = true }
 sea-orm = { workspace = true }
 serde_json = { workspace = true }
 chrono = { workspace = true }
 [lints]
 workspace = true
--- a/apps/app/src/args.rs
+++ b/apps/app/src/args.rs
@ -1,12 +0,0 @@
 use clap::Parser;
 #[derive(Parser, Debug)]
 #[command(name = "app")]
 #[command(version)]
 pub struct ServerArgs {
    #[arg(long, short)]
    pub bind: Option<String>,
    #[arg(long)]
    pub workers: Option<usize>,
 }
--- a/apps/app/src/logging.rs
+++ b/apps/app/src/logging.rs
@ -1,133 +0,0 @@
 //! Structured HTTP request logging middleware using tracing.
 //!
 //! Logs every incoming request with method, path, status code,
 //! response time, client IP, authenticated user ID, and trace_id.
 use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
 use futures::future::{LocalBoxFuture, Ready, ok};
 use session::SessionExt;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::time::Instant;
 use uuid::Uuid;
 /// Default log format: `{method} {path} {status} {duration_ms}ms`
 pub struct RequestLogger {
    trace_id_header: String,
 }
 impl RequestLogger {
    pub fn new(trace_id_header: String) -> Self {
        Self { trace_id_header }
    }
 }
 impl<S, B> Transform<S, ServiceRequest> for RequestLogger
 where
    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
    S::Future: 'static,
    B: 'static,
 {
    type Response = ServiceResponse<B>;
    type Error = actix_web::Error;
    type Transform = RequestLoggerMiddleware<S>;
    type InitError = ();
    type Future = Ready<Result<Self::Transform, Self::InitError>>;
    fn new_transform(&self, service: S) -> Self::Future {
        ok(RequestLoggerMiddleware {
            service: Arc::new(service),
            trace_id_header: self.trace_id_header.clone(),
        })
    }
 }
 pub struct RequestLoggerMiddleware<S> {
    service: Arc<S>,
    trace_id_header: String,
 }
 impl<S> Clone for RequestLoggerMiddleware<S> {
    fn clone(&self) -> Self {
        Self {
            service: self.service.clone(),
            trace_id_header: self.trace_id_header.clone(),
        }
    }
 }
 impl<S, B> Service<ServiceRequest> for RequestLoggerMiddleware<S>
 where
    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
    S::Future: 'static,
    B: 'static,
 {
    type Response = ServiceResponse<B>;
    type Error = actix_web::Error;
    type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
    fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
        self.service.poll_ready(cx)
    }
    fn call(&self, req: ServiceRequest) -> Self::Future {
        let started = Instant::now();
        let trace_id_header = self.trace_id_header.clone();
        let method = req.method().to_string();
        let path = req.path().to_string();
        let query = req.query_string().to_string();
        let remote = req
            .connection_info()
            .realip_remote_addr()
            .map(|s| s.to_string())
            .unwrap_or_else(|| "unknown".to_string());
        let user_id: Option<Uuid> = req.get_session().user();
        let trace_id = Uuid::now_v7().to_string();
        let full_path = if query.is_empty() {
            path.clone()
        } else {
            format!("{}?{}", path, query)
        };
        let service = self.service.clone();
        Box::pin(async move {
            let res = service.call(req).await?;
            let elapsed = started.elapsed();
            let status = res.status();
            let status_code = status.as_u16();
            let is_health = path == "/health";
            if !is_health {
                let user_id_str = user_id
                    .map(|u: Uuid| u.to_string())
                    .unwrap_or_else(|| "-".to_string());
                let duration_ms = elapsed.as_millis() as u64;
                let log_args = (
                    method = %method,
                    path = %full_path,
                    status = status_code,
                    duration_ms = duration_ms,
                    remote = %remote,
                    user_id = %user_id_str,
                    trace_id = %trace_id,
                );
                match status_code {
                    200..=299 => {
                        tracing::info!(log_args, "http_request");
                    }
                    400..=499 => {
                        tracing::warn!(log_args, "http_request");
                    }
                    _ => {
                        tracing::error!(log_args, "http_request");
                    }
                }
            }
            Ok(res)
        })
    }
 }
--- a/apps/app/src/main.rs
+++ b/apps/app/src/main.rs
@ -1,350 +0,0 @@
 use actix_cors::Cors;
 use actix_web::cookie::time::Duration;
 use actix_web::dev::{Service, ServiceRequest, ServiceResponse};
 use actix_web::{App, HttpResponse, HttpServer, cookie::Key, web};
 use api::{robots, sidemap};
 use clap::Parser;
 use db::cache::AppCache;
 use db::database::AppDatabase;
 use futures::future::LocalBoxFuture;
 use observability::{
    HttpMetrics, HttpSnapshotGuard, MetricsMiddleware, TracingSpanMiddleware,
    init_tracing_subscriber, install_recorder, prometheus_handler, push::MetricsPusher,
    spawn_http_metrics_poller,
 };
 use sea_orm::ConnectionTrait;
 use service::AppService;
 use session::SessionMiddleware;
 use session::config::{PersistentSession, SessionLifecycle, TtlExtensionPolicy};
 use session::storage::RedisClusterSessionStore;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::time::Instant;
 mod args;
 use args::ServerArgs;
 use config::AppConfig;
 use migrate::{Migrator, MigratorTrait};
 #[derive(Clone)]
 pub struct AppState {
    pub db: AppDatabase,
    pub cache: AppCache,
 }
 /// Custom middleware that logs requests except for noisy paths.
 struct RequestLogger;
 impl<S, B> actix_web::dev::Transform<S, ServiceRequest> for RequestLogger
 where
    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
    S::Future: 'static,
    B: 'static,
 {
    type Response = ServiceResponse<B>;
    type Error = actix_web::Error;
    type Transform = RequestLoggerService<S>;
    type InitError = ();
    type Future = futures::future::Ready<Result<Self::Transform, Self::InitError>>;
    fn new_transform(&self, service: S) -> Self::Future {
        futures::future::ok(RequestLoggerService {
            service,
            _marker: std::marker::PhantomData,
        })
    }
 }
 struct RequestLoggerService<S> {
    service: S,
    _marker: std::marker::PhantomData<fn(ServiceRequest)>,
 }
 impl<S, B> actix_web::dev::Service<ServiceRequest> for RequestLoggerService<S>
 where
    S: actix_web::dev::Service<
            ServiceRequest,
            Response = ServiceResponse<B>,
            Error = actix_web::Error,
        >,
    S::Future: 'static,
    B: 'static,
 {
    type Response = ServiceResponse<B>;
    type Error = actix_web::Error;
    type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
    fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
        self.service.poll_ready(cx)
    }
    fn call(&self, req: ServiceRequest) -> Self::Future {
        let path = req.path().to_string();
        let method = req.method().to_string();
        let should_log = !(path == "/health"
            || path == "/metrics"
            || path.starts_with("/ws")
            || path.starts_with("/assets"));
        let start = Instant::now();
        let fut = self.service.call(req);
        Box::pin(async move {
            let res = fut.await?;
            if should_log {
                tracing::info!(
                    target: "http_request",
                    method = %method,
                    path = %path,
                    status = res.status().as_u16(),
                    elapsed = ?start.elapsed(),
                    "{} {} {} {:?}",
                    method,
                    path,
                    res.status().as_u16(),
                    start.elapsed()
                );
            }
            Ok(res)
        })
    }
 }
 fn build_session_key(cfg: &AppConfig) -> anyhow::Result<Key> {
    if let Some(secret) = cfg.env.get("APP_SESSION_SECRET") {
        if secret.len() < 32 {
            tracing::warn!(
                secret_len = secret.len(),
                "APP_SESSION_SECRET is too short (<32 bytes), using generated key instead"
            );
            return Ok(Key::generate());
        }
        use hkdf::Hkdf;
        use sha2::Sha256;
        // HKDF-SHA256: standard key derivation with info string for domain separation
        let hk = Hkdf::<Sha256>::new(Some(b"session-cookie-key"), secret.as_bytes());
        let mut okm = [0u8; 64];
        hk.expand(b"actix-session-signing-key", &mut okm)
            .map_err(|e| anyhow::anyhow!("HKDF expand failed: {}", e))?;
        return Ok(Key::from(&okm));
    }
    tracing::warn!(
        "APP_SESSION_SECRET not set, using generated key (sessions invalidated on restart)"
    );
    Ok(Key::generate())
 }
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    let cfg = AppConfig::load();
    let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string());
    let otel_enabled = cfg.otel_enabled().unwrap_or(false);
    init_tracing_subscriber(&log_level, false);
    tracing::info!(
        app_name = %cfg.app_name().unwrap_or_default(),
        app_version = %cfg.app_version().unwrap_or_default(),
        "Starting application"
    );
    let db = AppDatabase::init(&cfg).await?;
    tracing::info!("Database connected");
    let redis_urls = cfg.redis_urls()?;
    let store: RedisClusterSessionStore = RedisClusterSessionStore::new(redis_urls).await?;
    tracing::info!("Redis connected");
    let cache = AppCache::init(&cfg).await?;
    tracing::info!("Cache initialized");
    run_migrations(&db).await?;
    let session_key = build_session_key(&cfg)?;
    let args = ServerArgs::parse();
    let service = AppService::new(cfg.clone()).await?;
    tracing::info!("AppService initialized");
    let _model_sync_handle = service.clone().start_sync_task();
    // TODO: workspace module not yet wired — billing alert task pending
    // let _billing_alert_handle = service.clone().start_billing_alert_task();
    let (shutdown_tx, shutdown_rx) = tokio::sync::broadcast::channel::<()>(1);
    let worker_service = service.clone();
    let worker_handle =
        tokio::spawn(async move { worker_service.start_room_workers(shutdown_rx).await });
    let _otel_guard = if otel_enabled {
        let endpoint = cfg
            .otel_endpoint()
            .unwrap_or_else(|_| "http://localhost:4317".to_string());
        let service_name = cfg
            .otel_service_name()
            .unwrap_or_else(|_| "app".to_string());
        let service_version = cfg
            .otel_service_version()
            .unwrap_or_else(|_| "0.1.0".to_string());
        tracing::info!(endpoint = %endpoint, service = %service_name, "OTLP tracing enabled");
        let guard =
            observability::init_otlp(&endpoint, &service_name, &service_version, &log_level)
                .map_err(|e| anyhow::anyhow!("OTLP init failed: {}", e))?;
        guard
    } else {
        None
    };
    let prometheus_handle = install_recorder();
    let prometheus_handle_data = web::Data::new(prometheus_handle.clone());
    let http_metrics = std::sync::Arc::new(HttpMetrics::new());
    let http_snapshot: HttpSnapshotGuard = std::sync::Arc::new(std::sync::RwLock::new(
        observability::HttpMetricsSnapshot::default(),
    ));
    let http_snapshot_for_poller = http_snapshot.clone();
    spawn_http_metrics_poller(
        http_metrics.clone(),
        http_snapshot_for_poller,
        std::time::Duration::from_secs(15),
    );
    let http_snapshot_data = web::Data::new(http_snapshot);
    // Metrics pusher: periodically push all metrics to apps/metrics aggregator
    if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
        let pusher = MetricsPusher::new(&push_url, "app");
        pusher.spawn(
            http_metrics.clone(),
            Arc::new(prometheus_handle.clone()),
            std::time::Duration::from_secs(15),
        );
        tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
    }
    let bind_addr = args.bind.unwrap_or_else(|| "127.0.0.1:8080".to_string());
    tracing::info!(bind_addr = %bind_addr, "Listening");
    let http_metrics_server = http_metrics.clone();
    let cors_origins: Vec<String> = cfg
        .env
        .get("CORS_ORIGINS")
        .map(|s| {
            s.split(',')
                .map(|s| s.trim().to_string())
                .filter(|s| !s.is_empty())
                .collect()
        })
        .unwrap_or_else(|| vec!["http://localhost:5173".to_string()]);
    let cookie_secure = cfg
        .env
        .get("APP_COOKIE_SECURE")
        .map(|s| s != "false")
        .unwrap_or(true);
    tracing::info!(cookie_secure = cookie_secure, "Cookie secure mode");
    HttpServer::new(move || {
        let mut cors = Cors::default();
        for origin in &cors_origins {
            cors = cors.allowed_origin(origin);
        }
        let cors = cors
            .allowed_methods(["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"])
            .allowed_headers([
                "Content-Type",
                "Authorization",
                "X-Requested-With",
                "Accept",
                "Origin",
            ])
            .supports_credentials()
            .max_age(3600);
        let security_headers = actix_web::middleware::DefaultHeaders::new()
            .add(("X-Content-Type-Options", "nosniff"))
            .add(("X-Frame-Options", "DENY"))
            .add(("Referrer-Policy", "strict-origin-when-cross-origin"));
        let session_mw = SessionMiddleware::builder(store.clone(), session_key.clone())
            .cookie_name("id".to_string())
            .cookie_path("/".to_string())
            .cookie_secure(cookie_secure)
            .cookie_http_only(true)
            .session_lifecycle(SessionLifecycle::PersistentSession(
                PersistentSession::default()
                    .session_ttl(Duration::days(30))
                    .session_ttl_extension_policy(TtlExtensionPolicy::OnEveryRequest),
            ))
            .build();
        let metrics_mw = MetricsMiddleware::new(http_metrics_server.clone());
        App::new()
            .wrap(cors)
            .wrap(security_headers)
            .wrap(session_mw)
            .wrap(RequestLogger)
            .wrap(metrics_mw)
            .wrap(TracingSpanMiddleware::new())
            .app_data(web::Data::new(AppState {
                db: db.clone(),
                cache: cache.clone(),
            }))
            .app_data(web::Data::new(service.clone()))
            .app_data(web::Data::new(cfg.clone()))
            .app_data(web::Data::new(db.clone()))
            .app_data(web::Data::new(cache.clone()))
            .app_data(http_snapshot_data.clone())
            .app_data(prometheus_handle_data.clone())
            .route("/robots.txt", web::get().to(robots::robots))
            .route("/sitemap.xml", web::get().to(sidemap::sitemap))
            .service(
                web::scope("/sidemap")
                    .route("", web::get().to(sidemap::sitemap))
                    .route("/static", web::get().to(sidemap::sitemap_static))
                    .route("/users", web::get().to(sidemap::sitemap_users))
                    .route("/projects", web::get().to(sidemap::sitemap_projects))
                    .route("/repos", web::get().to(sidemap::sitemap_repos)),
            )
            .route("/health", web::get().to(health_check))
            .route("/metrics", web::get().to(prometheus_handler))
            .configure(api::route::init_routes)
    })
    .bind(&bind_addr)?
    .run()
    .await?;
    tracing::info!("Server stopped, shutting down room workers");
    let _ = shutdown_tx.send(());
    let _ = worker_handle.await;
    tracing::info!("Room workers stopped");
    Ok(())
 }
 async fn run_migrations(db: &AppDatabase) -> anyhow::Result<()> {
    tracing::info!("Running database migrations...");
    Migrator::up(db.writer(), None)
        .await
        .map_err(|e| anyhow::anyhow!("Migration failed: {:?}", e))?;
    tracing::info!("Migrations completed");
    Ok(())
 }
 async fn health_check(state: web::Data<AppState>) -> HttpResponse {
    let db_ok = db_ping(&state.db).await;
    let cache_ok = cache_ping(&state.cache).await;
    let healthy = db_ok && cache_ok;
    if healthy {
        HttpResponse::Ok().json(serde_json::json!({
            "status": "ok",
            "db": "ok",
            "cache": "ok",
        }))
    } else {
        HttpResponse::ServiceUnavailable().json(serde_json::json!({
            "status": "unhealthy",
            "db": if db_ok { "ok" } else { "error" },
            "cache": if cache_ok { "ok" } else { "error" },
        }))
    }
 }
 async fn db_ping(db: &AppDatabase) -> bool {
    let writer_ok = db.writer().execute_unprepared("SELECT 1").await.is_ok();
    let reader_ok = db.reader().execute_unprepared("SELECT 1").await.is_ok();
    writer_ok && reader_ok
 }
 async fn cache_ping(cache: &AppCache) -> bool {
    cache.conn().await.is_ok()
 }
--- a/apps/email/Cargo.toml
+++ b/apps/email/Cargo.toml
@ -1,36 +0,0 @@
 [package]
 name = "email-server"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
 description.workspace = true
 repository.workspace = true
 readme.workspace = true
 homepage.workspace = true
 license.workspace = true
 keywords.workspace = true
 categories.workspace = true
 documentation.workspace = true
 [[bin]]
 name = "email-worker"
 path = "src/main.rs"
 [dependencies]
 tokio = { workspace = true, features = ["full"] }
 service = { workspace = true }
 db = { workspace = true }
 config = { workspace = true }
 tracing = { workspace = true }
 observability = { workspace = true }
 anyhow = { workspace = true }
 clap = { workspace = true, features = ["derive"] }
 chrono = { workspace = true, features = ["serde"] }
 hyper = { workspace = true }
 serde_json = { workspace = true }
 sea-orm = { workspace = true }
 metrics = "0.22"
 metrics-exporter-prometheus = "0.13"
 [lints]
 workspace = true
--- a/apps/email/src/main.rs
+++ b/apps/email/src/main.rs
@ -1,165 +0,0 @@
 use clap::Parser;
 use config::AppConfig;
 use metrics::{Unit, describe_counter};
 use metrics_exporter_prometheus::PrometheusHandle;
 use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
 use sea_orm::ConnectionTrait;
 use service::AppService;
 use std::sync::Arc;
 #[derive(Parser, Debug)]
 #[command(name = "email-worker")]
 #[command(version)]
 struct Args {
    #[arg(long, default_value = "info")]
    log_level: String,
 }
 async fn http_handler(
    db: Arc<db::database::AppDatabase>,
    cache: Arc<db::cache::AppCache>,
    metrics: Arc<PrometheusHandle>,
    req: hyper::Request<hyper::Body>,
 ) -> Result<hyper::Response<hyper::Body>, std::convert::Infallible> {
    match req.uri().path() {
        "/health" => {
            let writer_ok = db.writer().execute_unprepared("SELECT 1").await.is_ok();
            let reader_ok = db.reader().execute_unprepared("SELECT 1").await.is_ok();
            let db_ok = writer_ok && reader_ok;
            let cache_ok = cache.conn().await.is_ok();
            let body = serde_json::json!({
                "status": if db_ok && cache_ok { "ok" } else { "unhealthy" },
                "db": if db_ok { "ok" } else { "error" },
                "cache": if cache_ok { "ok" } else { "error" },
            });
            let status = if db_ok && cache_ok { 200 } else { 503 };
            let body_bytes = match serde_json::to_string(&body) {
                Ok(s) => hyper::Body::from(s),
                Err(e) => {
                    return Ok(hyper::Response::builder()
                        .status(500)
                        .body(hyper::Body::from(format!("serialize error: {}", e)))
                        .expect("static response"));
                }
            };
            Ok(hyper::Response::builder()
                .status(status)
                .header("content-type", "application/json")
                .body(body_bytes)
                .expect("static response"))
        }
        "/metrics" => {
            let body = metrics.render();
            Ok(hyper::Response::builder()
                .status(200)
                .header("content-type", "text/plain; version=0.0.4; charset=utf-8")
                .body(hyper::Body::from(body))
                .unwrap())
        }
        _ => Ok(hyper::Response::builder()
            .status(404)
            .body(hyper::Body::from("not found"))
            .unwrap()),
    }
 }
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    let args = Args::parse();
    let cfg = AppConfig::load();
    init_tracing_subscriber(&args.log_level, false);
    // Pre-register all email/queue metrics so they appear in /metrics even before first event.
    describe_counter!(
        "email_queued_total",
        Unit::Count,
        "Emails written to Redis stream"
    );
    describe_counter!(
        "email_consumed_total",
        Unit::Count,
        "Emails consumed from queue"
    );
    describe_counter!(
        "email_batch_size",
        Unit::Count,
        "Email consumer batch sizes accumulated"
    );
    describe_counter!(
        "email_validation_skipped_total",
        Unit::Count,
        "Emails skipped due to invalid recipient"
    );
    describe_counter!(
        "email_build_errors_total",
        Unit::Count,
        "Email message build failures"
    );
    describe_counter!(
        "email_send_attempts_total",
        Unit::Count,
        "SMTP send attempts (including retries)"
    );
    describe_counter!("email_sent_total", Unit::Count, "Emails sent successfully");
    describe_counter!(
        "email_send_failures_total",
        Unit::Count,
        "Emails that failed after all retries"
    );
    let metrics_handle = Arc::new(install_recorder());
    let http_metrics = Arc::new(HttpMetrics::new()); // Worker app — HTTP section will be empty
    // Metrics pusher: periodically push all metrics to apps/metrics aggregator
    if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
        let pusher = MetricsPusher::new(&push_url, "email");
        pusher.spawn(
            http_metrics.clone(),
            metrics_handle.clone(),
            std::time::Duration::from_secs(15),
        );
        tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
    }
    tracing::info!("Starting email worker");
    let service = AppService::new(cfg).await?;
    let db = Arc::new(service.db.clone());
    let cache = Arc::new(service.cache.clone());
    let (shutdown_tx, shutdown_rx) = tokio::sync::broadcast::channel::<()>(1);
    tokio::spawn(async move {
        tokio::signal::ctrl_c().await.ok();
        tracing::info!("shutting down email worker");
        let _ = shutdown_tx.send(());
    });
    // Start health/metrics server on a dedicated port
    let health_db = db.clone();
    let health_cache = cache.clone();
    let health_metrics = metrics_handle.clone();
    let health_addr: std::net::SocketAddr = ([0, 0, 0, 0], 8084).into();
    let health_service = hyper::service::make_service_fn(move |_| {
        let db = health_db.clone();
        let cache = health_cache.clone();
        let metrics = health_metrics.clone();
        let service = hyper::service::service_fn(move |req| {
            http_handler(db.clone(), cache.clone(), metrics.clone(), req)
        });
        async move { Ok::<_, std::convert::Infallible>(service) }
    });
    let health_server = hyper::Server::bind(&health_addr).serve(health_service);
    tracing::info!(port = 8084, "health/metrics server started");
    tokio::spawn(async move {
        if let Err(e) = health_server.await {
            tracing::error!("health check server error: {}", e);
        }
    });
    service.start_email_workers(shutdown_rx).await?;
    tracing::info!("email worker stopped");
    Ok(())
 }
--- a/apps/git-hook/Cargo.toml
+++ b/apps/git-hook/Cargo.toml
@ -1,35 +0,0 @@
 [package]
 name = "git-hook"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
 description.workspace = true
 repository.workspace = true
 readme.workspace = true
 homepage.workspace = true
 license.workspace = true
 keywords.workspace = true
 categories.workspace = true
 documentation.workspace = true
 [dependencies]
 tokio = { workspace = true, features = ["full"] }
 git = { workspace = true }
 observability = { workspace = true }
 db = { workspace = true }
 config = { workspace = true }
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true, features = ["json"] }
 anyhow = { workspace = true }
 clap = { workspace = true, features = ["derive"] }
 tokio-util = { workspace = true }
 hyper = { workspace = true }
 serde_json = { workspace = true }
 sea-orm = { workspace = true }
 metrics = "0.22"
 metrics-exporter-prometheus = "0.13"
 chrono = { workspace = true, features = ["serde"] }
 reqwest = { workspace = true }
 agent = { workspace = true }
 models = { workspace = true }
 async-trait = { workspace = true }
--- a/apps/git-hook/src/args.rs
+++ b/apps/git-hook/src/args.rs
@ -1,10 +0,0 @@
 use clap::Parser;
 #[derive(Parser, Debug)]
 #[command(name = "git-hook")]
 #[command(version)]
 pub struct HookArgs {
    /// Worker ID for this instance. Defaults to the HOOK_POOL_WORKER_ID env var or a generated UUID.
    #[arg(long)]
    pub worker_id: Option<String>,
 }
--- a/apps/git-hook/src/main.rs
+++ b/apps/git-hook/src/main.rs
@ -1,256 +0,0 @@
 use clap::Parser;
 use config::AppConfig;
 use db::cache::AppCache;
 use db::database::AppDatabase;
 use git::hook::HookService;
 use git::hook::embed::TagEmbedder;
 use metrics::{Unit, describe_counter};
 use metrics_exporter_prometheus::PrometheusHandle;
 use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
 use sea_orm::ConnectionTrait;
 use std::sync::Arc;
 use tokio::signal;
 mod args;
 use args::HookArgs;
 /// Initialize EmbedService from config (graceful degradation).
 async fn init_embed_service(
    cfg: &AppConfig,
    db: &AppDatabase,
 ) -> Result<agent::embed::EmbedService, Box<dyn std::error::Error + Send + Sync>> {
    let client = agent::new_embed_client(cfg).await?;
    let model_name = cfg
        .get_embed_model_name()
        .unwrap_or_else(|_| "text-embedding-3-small".into());
    let dimensions = cfg.get_embed_model_dimensions().unwrap_or(1536);
    let svc = agent::embed::EmbedService::new(client, db.writer().clone(), model_name, dimensions);
    let _ = svc.ensure_collections().await;
    tracing::info!("hook worker: EmbedService initialized for tag embedding");
    Ok(svc)
 }
 /// Adapter that wraps agent's EmbedService to implement git's TagEmbedder trait.
 struct EmbedServiceAdapter(agent::embed::EmbedService);
 #[async_trait::async_trait]
 impl TagEmbedder for EmbedServiceAdapter {
    async fn embed_tags_batch(
        &self,
        tags: Vec<models::TagEmbedInput>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        // Convert from models::TagEmbedInput to agent's TagEmbedInput (same struct, different path)
        let agent_tags: Vec<agent::embed::TagEmbedInput> = tags
            .into_iter()
            .map(|t| agent::embed::TagEmbedInput {
                repo_id: t.repo_id,
                repo_name: t.repo_name,
                project_id: t.project_id,
                name: t.name,
                description: t.description,
            })
            .collect();
        self.0
            .embed_tags_batch(agent_tags)
            .await
            .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)
    }
 }
 async fn http_handler(
    db: Arc<AppDatabase>,
    cache: Arc<AppCache>,
    metrics: Arc<PrometheusHandle>,
    req: hyper::Request<hyper::Body>,
 ) -> Result<hyper::Response<hyper::Body>, std::convert::Infallible> {
    match req.uri().path() {
        "/health" => {
            let writer_ok = db.writer().execute_unprepared("SELECT 1").await.is_ok();
            let reader_ok = db.reader().execute_unprepared("SELECT 1").await.is_ok();
            let db_ok = writer_ok && reader_ok;
            let cache_ok = cache.conn().await.is_ok();
            let body = serde_json::json!({
                "status": if db_ok && cache_ok { "ok" } else { "unhealthy" },
                "db": if db_ok { "ok" } else { "error" },
                "cache": if cache_ok { "ok" } else { "error" },
            });
            let status = if db_ok && cache_ok { 200 } else { 503 };
            let body_bytes = match serde_json::to_string(&body) {
                Ok(s) => hyper::Body::from(s),
                Err(e) => {
                    return Ok(hyper::Response::builder()
                        .status(500)
                        .body(hyper::Body::from(format!("serialize error: {}", e)))
                        .expect("static response"));
                }
            };
            Ok(hyper::Response::builder()
                .status(status)
                .header("content-type", "application/json")
                .body(body_bytes)
                .expect("static response"))
        }
        "/metrics" => {
            let body = metrics.render();
            Ok(hyper::Response::builder()
                .status(200)
                .header("content-type", "text/plain; version=0.0.4; charset=utf-8")
                .body(hyper::Body::from(body))
                .expect("static response"))
        }
        _ => Ok(hyper::Response::builder()
            .status(404)
            .body(hyper::Body::from("not found"))
            .expect("static response")),
    }
 }
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    let cfg = AppConfig::load();
    let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string());
    init_tracing_subscriber(&log_level, false);
    // Pre-register all hook metrics so they appear in /metrics even before first increment.
    describe_counter!("hook_tasks_total", Unit::Count, "Total hook tasks dequeued");
    describe_counter!(
        "hook_tasks_success_total",
        Unit::Count,
        "Hook tasks completed successfully"
    );
    describe_counter!(
        "hook_tasks_failed_total",
        Unit::Count,
        "Hook tasks that failed"
    );
    describe_counter!(
        "hook_tasks_locked_total",
        Unit::Count,
        "Hook tasks re-queued due to repo lock"
    );
    describe_counter!(
        "hook_tasks_retried_total",
        Unit::Count,
        "Hook tasks that entered retry"
    );
    describe_counter!(
        "hook_tasks_exhausted_total",
        Unit::Count,
        "Hook tasks that exhausted retries"
    );
    describe_counter!(
        "hook_sync_branches_changed_total",
        Unit::Count,
        "Branches changed during sync"
    );
    describe_counter!(
        "hook_sync_tags_changed_total",
        Unit::Count,
        "Tags changed during sync"
    );
    let metrics_handle = Arc::new(install_recorder());
    let http_metrics = Arc::new(HttpMetrics::new()); // Worker app — HTTP section will be empty
    // Metrics pusher: periodically push all metrics to apps/metrics aggregator
    if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
        let pusher = MetricsPusher::new(&push_url, "git-hook");
        pusher.spawn(
            http_metrics.clone(),
            metrics_handle.clone(),
            std::time::Duration::from_secs(15),
        );
        tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
    }
    let db = Arc::new(AppDatabase::init(&cfg).await?);
    tracing::info!("database connected");
    // 4. Connect to Redis cache (also provides the cluster pool for hook queue)
    let cache = Arc::new(AppCache::init(&cfg).await?);
    tracing::info!("cache connected");
    // 5. Parse CLI args
    let _args = HookArgs::parse();
    tracing::info!("git-hook worker starting");
    // 6. Build and start git hook service
    let mut hooks = HookService::new(
        (*db).clone(),
        (*cache).clone(),
        cache.redis_pool().clone(),
        cfg.clone(),
    );
    // Optionally initialize tag embedding
    if let Ok(embed_svc) = init_embed_service(&cfg, &db).await {
        let adapter = EmbedServiceAdapter(embed_svc);
        hooks = hooks.with_tag_embedder(Arc::new(adapter));
    }
    let cancel = hooks.start_worker().await;
    let cancel_signal = cancel.clone();
    // 7. Start health/metrics server on a dedicated port
    let health_db = db.clone();
    let health_cache = cache.clone();
    let health_metrics = metrics_handle.clone();
    let health_addr: std::net::SocketAddr = ([0, 0, 0, 0], 8083).into();
    let health_service = hyper::service::make_service_fn(move |_| {
        let db = health_db.clone();
        let cache = health_cache.clone();
        let metrics = health_metrics.clone();
        let service = hyper::service::service_fn(move |req| {
            http_handler(db.clone(), cache.clone(), metrics.clone(), req)
        });
        async move { Ok::<_, std::convert::Infallible>(service) }
    });
    let health_server = hyper::Server::bind(&health_addr).serve(health_service);
    tracing::info!(port = 8083, "health/metrics server started");
    tokio::spawn(async move {
        if let Err(e) = health_server.await {
            tracing::error!("health check server error: {}", e);
        }
    });
    // Spawn signal handler that cancels on SIGINT/SIGTERM
    tokio::spawn(async move {
        let ctrl_c = async {
            signal::ctrl_c()
                .await
                .expect("failed to install CTRL+C handler");
        };
        #[cfg(unix)]
        let term = async {
            use tokio::signal::unix::{SignalKind, signal};
            let mut sig =
                signal(SignalKind::terminate()).expect("failed to install SIGTERM handler");
            sig.recv().await;
        };
        #[cfg(not(unix))]
        let term = std::future::pending::<()>();
        tokio::select! {
            _ = ctrl_c => {
                tracing::info!("received SIGINT, initiating shutdown");
            }
            _ = term => {
                tracing::info!("received SIGTERM, initiating shutdown");
            }
        }
        cancel_signal.cancel();
    });
    // Wait until the worker is cancelled (by signal handler or otherwise)
    cancel.cancelled().await;
    tracing::info!("git-hook worker stopped");
    Ok(())
 }
--- a/apps/gitserver/Cargo.toml
+++ b/apps/gitserver/Cargo.toml
@ -1,31 +0,0 @@
 [package]
 name = "gitserver"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
 description.workspace = true
 repository.workspace = true
 readme.workspace = true
 homepage.workspace = true
 license.workspace = true
 keywords.workspace = true
 categories.workspace = true
 documentation.workspace = true
 [[bin]]
 name = "gitserver"
 path = "src/main.rs"
 [dependencies]
 tokio = { workspace = true, features = ["full"] }
 git = { workspace = true }
 observability = { workspace = true }
 tracing = { workspace = true }
 db = { workspace = true }
 config = { workspace = true }
 anyhow = { workspace = true }
 clap = { workspace = true, features = ["derive"] }
 chrono = { workspace = true, features = ["serde"] }
 [lints]
 workspace = true
--- a/apps/gitserver/src/main.rs
+++ b/apps/gitserver/src/main.rs
@ -1,59 +0,0 @@
 use clap::Parser;
 use config::AppConfig;
 use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
 use std::sync::Arc;
 #[derive(Parser, Debug)]
 #[command(name = "gitserver")]
 #[command(version)]
 struct Args {
    #[arg(long, default_value = "info")]
    log_level: String,
 }
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    let args = Args::parse();
    let cfg = AppConfig::load();
    init_tracing_subscriber(&args.log_level, false);
    let prometheus_handle = Arc::new(install_recorder());
    let http_metrics = Arc::new(HttpMetrics::new());
    // Metrics pusher: periodically push all metrics to apps/metrics aggregator
    if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
        let pusher = MetricsPusher::new(&push_url, "gitserver");
        pusher.spawn(
            http_metrics.clone(),
            prometheus_handle.clone(),
            std::time::Duration::from_secs(15),
        );
        tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
    }
    let http_handle = tokio::spawn(git::http::run_http(cfg.clone()));
    let ssh_handle = tokio::spawn(git::ssh::run_ssh(cfg));
    tokio::select! {
        result = http_handle => {
            match result {
                Ok(Ok(())) => tracing::info!("HTTP server stopped"),
                Ok(Err(e)) => tracing::error!("HTTP server error: {}", e),
                Err(e) => tracing::error!("HTTP server task panicked: {}", e),
            }
        }
        result = ssh_handle => {
            match result {
                Ok(Ok(())) => tracing::info!("SSH server stopped"),
                Ok(Err(e)) => tracing::error!("SSH server error: {}", e),
                Err(e) => tracing::error!("SSH server task panicked: {}", e),
            }
        }
        _ = tokio::signal::ctrl_c() => {
            tracing::info!("received shutdown signal");
        }
    }
    tracing::info!("shutting down");
    Ok(())
 }
--- a/apps/metrics/Cargo.toml
+++ b/apps/metrics/Cargo.toml
@ -1,58 +0,0 @@
 [package]
 name = "metrics-aggregator"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
 description = "Unified observability aggregator: scrapes metrics, forwards traces, collects logs"
 repository.workspace = true
 readme.workspace = true
 homepage.workspace = true
 license.workspace = true
 keywords.workspace = true
 categories.workspace = true
 documentation.workspace = true
 [[bin]]
 name = "metrics-aggregator"
 path = "src/main.rs"
 [dependencies]
 tokio = { workspace = true, features = ["full"] }
 config = { workspace = true }
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }
 observability = { workspace = true }
 anyhow = { workspace = true }
 clap = { workspace = true, features = ["derive", "env"] }
 serde_json = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 serde = { workspace = true, features = ["derive"] }
 # HTTP server
 actix-web = "4.13.0"
 actix-rt = "2.11.0"
 # HTTP client for scraping (uses awc = actix-web client, no extra TLS deps)
 awc = { workspace = true }
 # HTTP client for Loki (reqwest is Send+Sync, unlike awc::Client)
 reqwest = { workspace = true, features = ["json"] }
 # Metrics
 metrics = { workspace = true }
 metrics-exporter-prometheus = { version = "0.18", default-features = false, features = ["http-listener", "tokio"] }
 # Observability
 opentelemetry = { workspace = true }
 opentelemetry_sdk = { workspace = true }
 opentelemetry-otlp = { version = "0.31.0", default-features = false, features = ["http-proto", "tokio", "trace", "tonic"] }
 tracing-opentelemetry = "0.32.1"
 tokio-util = { workspace = true }
 tokio-stream = { workspace = true }
 futures = { workspace = true }
 url = { workspace = true }
 tower = { workspace = true }
 [lints]
 workspace = true
--- a/apps/metrics/src/args.rs
+++ b/apps/metrics/src/args.rs
@ -1,35 +0,0 @@
 use clap::Parser;
 #[derive(Parser, Debug)]
 #[command(name = "metrics-aggregator")]
 #[command(version)]
 pub struct Args {
    #[arg(long, default_value = "9090", env = "METRICS_AGGREGATOR_PORT")]
    pub port: u16,
    #[arg(long, env = "OTEL_EXPORTER_OTLP_ENDPOINT")]
    pub otel_endpoint: Option<String>,
    #[arg(long, env = "LOKI_URL")]
    pub loki_url: Option<String>,
    #[arg(long, default_value = "15", env = "SCRAPE_INTERVAL_SECS")]
    pub scrape_interval_secs: u64,
    /// JSON file with scrape targets.
    #[arg(long, env = "SCRAPE_TARGETS_FILE")]
    pub targets_file: Option<String>,
    #[arg(long, default_value = "info", env = "LOG_LEVEL")]
    pub log_level: String,
    /// Comma-separated list of app names to scrape.
    #[arg(long, env = "SCRAPE_APPS")]
    pub scrape_apps: Option<String>,
    #[arg(long)]
    pub no_otel: bool,
    #[arg(long)]
    pub no_loki: bool,
 }
--- a/apps/metrics/src/hotreload.rs
+++ b/apps/metrics/src/hotreload.rs
@ -1,40 +0,0 @@
 use std::sync::Arc;
 use tokio::sync::RwLock;
 use crate::target::{ScrapeTarget, load_targets_from_file};
 pub async fn watch_targets_file(
    path: String,
    targets: Arc<RwLock<Vec<ScrapeTarget>>>,
    mut shutdown: tokio::sync::broadcast::Receiver<()>,
 ) {
    let mtime_path = path;
    let mut last_mtime: Option<std::time::SystemTime> = None;
    loop {
        tokio::select! {
            _ = shutdown.recv() => break,
            _ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {
                let metadata = match tokio::fs::metadata(&mtime_path).await {
                    Ok(m) => m,
                    Err(_) => continue,
                };
                let current_mtime = metadata.modified().ok();
                if current_mtime != last_mtime {
                    last_mtime = current_mtime;
                    match load_targets_from_file(&mtime_path).await {
                        Ok(new_targets) => {
                            let mut guard = targets.write().await;
                            *guard = new_targets;
                            tracing::info!(path = %mtime_path, "targets file reloaded");
                        }
                        Err(e) => {
                            tracing::warn!(error = %e, "failed to reload targets file");
                        }
                    }
                }
            }
        }
    }
 }
--- a/apps/metrics/src/k8s_discovery.rs
+++ b/apps/metrics/src/k8s_discovery.rs
@ -1,70 +0,0 @@
 use std::time::Duration;
 use awc::Client;
 use crate::target::ScrapeTarget;
 pub async fn k8s_pod_discovery() -> Option<Vec<ScrapeTarget>> {
    let pod_namespace = std::env::var("POD_NAMESPACE").ok()?;
    let token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token";
    let token = tokio::fs::read_to_string(token_path).await.ok()?;
    let client = Client::builder()
        .timeout(Duration::from_secs(5))
        .add_default_header((
            awc::http::header::AUTHORIZATION.as_str(),
            format!("Bearer {}", token),
        ))
        .finish();
    let api_url = format!(
        "https://kubernetes.default.svc/api/v1/namespaces/{}/pods",
        pod_namespace
    );
    let mut response = client.get(api_url).send().await.ok()?;
    let body_bytes = response.body().await.ok()?;
    let pod_list: serde_json::Value = serde_json::from_slice(&body_bytes).ok()?;
    let targets: Vec<ScrapeTarget> = pod_list["items"]
        .as_array()?
        .iter()
        .filter_map(|pod| {
            let name = pod["metadata"]["name"].as_str()?.to_string();
            let phase = pod["status"]["phase"].as_str()?;
            if phase != "Running" {
                return None;
            }
            let pod_ip = pod["status"]["podIP"].as_str()?;
            let annotations = pod["metadata"]["annotations"].as_object()?;
            let port: u16 = annotations
                .get("metrics.port")
                .and_then(|v| v.as_str())
                .and_then(|s| s.parse().ok())
                .unwrap_or(8080);
            let path = annotations
                .get("metrics.path")
                .and_then(|v| v.as_str())
                .unwrap_or("/metrics");
            let labels = pod["metadata"]["labels"]
                .as_object()
                .map(|m| {
                    m.iter()
                        .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
                        .collect()
                })
                .unwrap_or_default();
            Some(ScrapeTarget {
                name,
                addr: format!("{}:{}", pod_ip, port),
                metrics_path: path.to_string(),
                labels,
            })
        })
        .collect();
    Some(targets)
 }
--- a/apps/metrics/src/loki.rs
+++ b/apps/metrics/src/loki.rs
@ -1,70 +0,0 @@
 use chrono::{DateTime, Utc};
 use reqwest::Client;
 use serde::Serialize;
 use std::collections::HashMap;
 #[derive(Clone)]
 pub struct LokiForwarder {
    url: String,
    client: Client,
    labels: HashMap<String, String>,
 }
 impl LokiForwarder {
    pub fn new(url: String) -> Self {
        Self {
            url,
            client: Client::builder()
                .timeout(std::time::Duration::from_secs(5))
                .build()
                .expect("valid reqwest client"),
            labels: HashMap::new(),
        }
    }
    pub async fn push(&self, log_entries: Vec<LokiEntry>) -> anyhow::Result<()> {
        if log_entries.is_empty() {
            return Ok(());
        }
        let streams: Vec<LokiStream> = vec![LokiStream {
            stream: self.labels.clone(),
            values: log_entries
                .into_iter()
                .map(|e| (format!("{}", e.timestamp), e.line))
                .collect(),
        }];
        let payload = LokiPayload { streams };
        let resp = self
            .client
            .post(&self.url)
            .header("Content-Type", "application/json")
            .json(&payload)
            .send()
            .await;
        match resp {
            Ok(r) if r.status().is_success() => Ok(()),
            Ok(r) => anyhow::bail!("Loki push failed: {}", r.status()),
            Err(e) => anyhow::bail!("Loki push error: {}", e),
        }
    }
 }
 #[derive(Serialize)]
 struct LokiPayload {
    streams: Vec<LokiStream>,
 }
 #[derive(Serialize)]
 struct LokiStream {
    stream: HashMap<String, String>,
    values: Vec<(String, String)>,
 }
 pub struct LokiEntry {
    pub timestamp: DateTime<Utc>,
    pub line: String,
 }
--- a/apps/metrics/src/main.rs
+++ b/apps/metrics/src/main.rs
@ -1,633 +0,0 @@
 //! Unified observability aggregator for in-cluster deployment.
 //!
 //! Collects metrics from all app pods via Prometheus scrape, forwards traces
 //! to OTLP endpoint, and streams logs from all pods to Loki-compatible backend.
 //!
 //! Usage:
 //!   METRICS_AGGREGATOR_PORT=9090 \
 //!   OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317 \
 //!   LOKI_URL=http://loki:3100/loki/api/v1/push \
 //!   SCRAPE_INTERVAL_SECS=15 \
 //!   SCRAPE_TARGETS_FILE=/etc/metrics/targets.json \
 //!   metrics-aggregator
 mod args;
 mod hotreload;
 mod k8s_discovery;
 mod loki;
 mod metrics;
 mod otel;
 mod scrape;
 mod stats_store;
 mod target;
 use serde::Deserialize;
 use std::collections::HashMap;
 use std::fmt::Write as _;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use std::time::Duration;
 use actix_web::{HttpResponse, HttpServer, web};
 use clap::Parser;
 use loki::{LokiEntry, LokiForwarder};
 use metrics::AggMetrics;
 use observability::{init_tracing_subscriber, install_recorder, instance_id};
 use otel::OtelGuard;
 use scrape::{HttpClient, ScrapeResult};
 use stats_store::StatsStore;
 use target::ScrapeTarget;
 use tokio::io::AsyncBufReadExt;
 use tokio::sync::{RwLock, broadcast};
 use tokio::time::interval;
 type MetricsStore = Arc<RwLock<HashMap<String, Vec<scrape::PromMetric>>>>;
 // StatsStore is defined in stats_store.rs — per-app aggregated data.
 #[actix_web::main]
 async fn main() -> std::io::Result<()> {
    let args = args::Args::parse();
    init_tracing_subscriber(&args.log_level, false);
    let instance = instance_id();
    tracing::info!(
        instance = %instance,
        port = args.port,
        scrape_interval = args.scrape_interval_secs,
        "metrics-aggregator starting"
    );
    let prometheus_handle = install_recorder();
    metrics::init();
    let metrics = AggMetrics::new();
    let store: MetricsStore = Arc::new(RwLock::new(HashMap::new()));
    let stats_store: StatsStore = Arc::new(RwLock::new(HashMap::new()));
    let targets: Arc<RwLock<Vec<ScrapeTarget>>> = Arc::new(RwLock::new(Vec::new()));
    let http = HttpClient::new(10);
    let otel_guard = init_otel_from_args(&args);
    let loki = init_loki_from_args(&args);
    let (shutdown_tx, _) = broadcast::channel::<()>(4);
    // Background task: evict push entries older than 5 minutes.
    let stats_store_for_evict = stats_store.clone();
    let mut evict_shutdown = shutdown_tx.subscribe();
    tokio::spawn(async move {
        let mut ticker = interval(Duration::from_secs(30));
        loop {
            tokio::select! {
                _ = evict_shutdown.recv() => break,
                _ = ticker.tick() => {
                    let cutoff = chrono::Utc::now().timestamp() - 300;
                    let mut guard = stats_store_for_evict.write().await;
                    guard.retain(|_, entry| entry.last_seen >= cutoff);
                }
            }
        }
    });
    if let Some(path) = &args.targets_file {
        match target::load_targets_from_file(path).await {
            Ok(initial_targets) => {
                let mut guard = targets.write().await;
                *guard = initial_targets;
                tracing::info!(count = guard.len(), "loaded initial targets from file");
            }
            Err(e) => {
                tracing::warn!(error = %e, "failed to load targets file");
            }
        }
        let tw =
            hotreload::watch_targets_file(path.clone(), targets.clone(), shutdown_tx.subscribe());
        tokio::spawn(tw);
    } else if std::env::var("KUBERNETES_SERVICE_HOST").is_ok() {
        if let Some(k8s_targets) = k8s_discovery::k8s_pod_discovery().await {
            let mut guard = targets.write().await;
            *guard = k8s_targets.clone();
            tracing::info!(count = guard.len(), "discovered K8s pods as targets");
        }
    }
    let scrape_filter = args
        .scrape_apps
        .as_ref()
        .map(|s| s.split(',').map(|p| p.trim().to_string()).collect());
    let scrape_targets = targets.clone();
    let scrape_store = store.clone();
    let scrape_metrics = metrics.clone();
    let scrape_http = http.clone();
    let loki_clone = loki.clone();
    let shutdown_tx_clone = shutdown_tx.clone();
    let scrape_interval = args.scrape_interval_secs;
    let scrape_filter_clone = scrape_filter.clone();
    tokio::task::spawn_local(async move {
        scrape_loop(
            scrape_targets,
            scrape_store,
            scrape_metrics,
            scrape_http,
            scrape_interval,
            scrape_filter_clone,
            loki_clone,
            shutdown_tx_clone.subscribe(),
        )
        .await;
    });
    let log_shutdown = shutdown_tx.subscribe();
    let log_loki = loki.clone();
    tokio::task::spawn_local(async move {
        log_collector(log_loki, log_shutdown).await;
    });
    let bind_addr: SocketAddr = ([0, 0, 0, 0], args.port).into();
    tracing::info!(addr = %bind_addr, "HTTP server starting");
    let app_targets = targets.clone();
    let app_store = store.clone();
    let app_handle = prometheus_handle.clone();
    let loki_for_push: Option<Arc<LokiForwarder>> = loki.map(Arc::new);
    let app_stats = stats_store.clone();
    let server = HttpServer::new(move || {
        let targets = app_targets.clone();
        let store = app_store.clone();
        let handle = app_handle.clone();
        let stats_store = app_stats.clone();
        let loki_for_push: Option<Arc<LokiForwarder>> = loki_for_push.clone();
        actix_web::App::new()
            .app_data(web::Data::new(targets))
            .app_data(web::Data::new(store))
            .app_data(web::Data::new(handle))
            .app_data(web::Data::new(stats_store))
            .app_data(web::Data::new(loki_for_push))
            .route("/metrics", web::get().to(handle_metrics))
            .route("/api/v1/metrics", web::get().to(handle_metrics))
            .route("/api/v1/push", web::post().to(handle_push))
            .route("/api/v1/dashboard", web::get().to(handle_dashboard))
            .route("/api/v1/stats", web::get().to(handle_stats))
            .route("/health", web::get().to(handle_health))
            .route("/api/v1/health", web::get().to(handle_health))
            .route("/api/v1/targets", web::get().to(handle_targets))
    })
    .bind(&bind_addr)?
    .run();
    let server_handle = server.handle();
    tokio::spawn(server);
    tokio::signal::ctrl_c().await.ok();
    tracing::info!("received Ctrl+C, shutting down");
    let _ = shutdown_tx.send(());
    server_handle.stop(true).await;
    if let Some(guard) = otel_guard {
        guard.shutdown().await;
    }
    tracing::info!("metrics-aggregator stopped");
    Ok(())
 }
 fn init_otel_from_args(args: &args::Args) -> Option<OtelGuard> {
    if args.no_otel {
        return None;
    }
    let endpoint = args
        .otel_endpoint
        .clone()
        .or_else(|| std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok())?;
    match otel::init_otel(&endpoint, "metrics-aggregator") {
        Ok(guard) => {
            tracing::info!(endpoint = %endpoint, "OTLP tracing enabled");
            Some(guard)
        }
        Err(e) => {
            tracing::warn!(error = %e, "OTLP init failed, continuing without traces");
            None
        }
    }
 }
 fn init_loki_from_args(args: &args::Args) -> Option<LokiForwarder> {
    if args.no_loki {
        return None;
    }
    let url = args
        .loki_url
        .clone()
        .or_else(|| std::env::var("LOKI_URL").ok())?;
    tracing::info!("Loki log forwarding enabled");
    Some(LokiForwarder::new(url))
 }
 async fn handle_metrics(
    store: web::Data<MetricsStore>,
    stats_store: web::Data<StatsStore>,
    handle: web::Data<observability::PrometheusHandle>,
 ) -> HttpResponse {
    let extra = vec![("aggregator_instance".to_string(), "default".to_string())];
    let scraped = render_aggregated_metrics(store, extra.clone()).await;
    let pushed = render_pushed_metrics(stats_store).await;
    let combined = format!("{}{}{}", handle.render(), scraped, pushed);
    HttpResponse::Ok()
        .content_type("text/plain; version=0.0.4; charset=utf-8")
        .body(combined)
 }
 async fn handle_health() -> HttpResponse {
    HttpResponse::Ok()
        .content_type("application/json")
        .body(r#"{"status":"ok"}"#)
 }
 async fn handle_targets(targets: web::Data<Arc<RwLock<Vec<ScrapeTarget>>>>) -> HttpResponse {
    let guard = targets.read().await;
    let json = serde_json::to_string(&*guard).unwrap_or_default();
    HttpResponse::Ok()
        .content_type("application/json")
        .body(json)
 }
 // ── Push endpoint payload ────────────────────────────────────────────────────
 #[derive(Debug, Deserialize)]
 #[serde(rename_all = "camelCase")]
 struct PushPayload {
    app: String,
    #[serde(default)]
    instance: String,
    timestamp: i64,
    #[serde(default)]
    http: Option<observability::push::HttpPayload>,
    #[serde(default)]
    system: Option<observability::push::SystemPayload>,
    #[serde(default)]
    business: HashMap<String, f64>,
    #[serde(default)]
    token_usage: Option<observability::push::TokenUsagePayload>,
    #[serde(default)]
    tasks: Option<observability::push::TaskStatsPayload>,
    #[serde(default)]
    latency: HashMap<String, observability::push::LatencySnapshot>,
    #[serde(default)]
    logs: Vec<observability::push::LogEntry>,
 }
 async fn handle_push(
    stats_store: web::Data<StatsStore>,
    loki: web::Data<Option<Arc<LokiForwarder>>>,
    payload: web::Json<PushPayload>,
 ) -> HttpResponse {
    let app = payload.app.clone();
    stats_store::merge_push_payload(
        &stats_store,
        &app,
        &payload.instance,
        payload.timestamp,
        payload.http.as_ref(),
        payload.system.as_ref(),
        &payload.business,
        payload.token_usage.as_ref(),
        payload.tasks.as_ref(),
        &payload.latency,
        &payload.logs,
    )
    .await;
    // Forward logs to Loki if configured
    if !payload.logs.is_empty() {
        if let Some(loki_fwd) = loki.as_ref() {
            let entries: Vec<LokiEntry> = payload
                .logs
                .iter()
                .map(|l| LokiEntry {
                    timestamp: chrono::DateTime::from_timestamp(l.timestamp, 0)
                        .unwrap_or_else(chrono::Utc::now),
                    line: format!("[{}] {}", l.level.to_lowercase(), l.message),
                })
                .collect();
            if let Err(e) = loki_fwd.push(entries).await {
                tracing::warn!(error = %e, "loki push on /push failed");
            }
        }
    }
    HttpResponse::Ok().body("ok")
 }
 async fn scrape_loop(
    targets: Arc<RwLock<Vec<ScrapeTarget>>>,
    store: MetricsStore,
    metrics: AggMetrics,
    http: HttpClient,
    interval_secs: u64,
    scrape_apps_filter: Option<Vec<String>>,
    _loki: Option<LokiForwarder>,
    mut shutdown: broadcast::Receiver<()>,
 ) {
    let mut ticker = interval(Duration::from_secs(interval_secs));
    loop {
        tokio::select! {
            _ = shutdown.recv() => break,
            _ = ticker.tick() => {
                let targets_snapshot = targets.read().await.clone();
                let count = targets_snapshot.len() as u64;
                metrics.targets_total.set(count as f64);
                let mut healthy_count = 0u64;
                for target in &targets_snapshot {
                    if let Some(ref filter) = scrape_apps_filter {
                        if !filter.contains(&target.name) {
                            continue;
                        }
                    }
                    metrics.scrape_total.increment(1);
                    match http.scrape(target).await {
                        ScrapeResult::Success(body, duration_ms) => {
                            metrics.scrape_success.increment(1);
                            metrics.scrape_duration.record(duration_ms);
                            let parsed = scrape::parse_prometheus(&body);
                            update_store(store.clone(), &target.name, parsed).await;
                            healthy_count += 1;
                        }
                        ScrapeResult::Timeout => {
                            metrics.scrape_failures.increment(1);
                            metrics.scrape_errors_timeout.increment(1);
                            tracing::warn!(target = %target.name, "scrape timeout");
                        }
                        ScrapeResult::ConnectionError(e) => {
                            metrics.scrape_failures.increment(1);
                            metrics.scrape_errors_connection.increment(1);
                            tracing::warn!(target = %target.name, error = %e, "scrape connection error");
                        }
                        ScrapeResult::HttpError(status) => {
                            metrics.scrape_failures.increment(1);
                            tracing::warn!(target = %target.name, status = status, "scrape HTTP error");
                        }
                    }
                }
                metrics.targets_healthy.set(healthy_count as f64);
            }
        }
    }
 }
 async fn update_store(store: MetricsStore, target_name: &str, metrics: Vec<scrape::PromMetric>) {
    let mut guard = store.write().await;
    guard.insert(target_name.to_string(), metrics);
 }
 async fn render_aggregated_metrics(
    store: web::Data<MetricsStore>,
    extra_group_labels: Vec<(String, String)>,
 ) -> String {
    let guard = store.read().await;
    let mut output = String::new();
    for (target_name, metrics) in guard.iter() {
        for metric in metrics {
            let mut labels = metric.labels.clone();
            labels.insert(
                "aggregated_by".to_string(),
                "metrics-aggregator".to_string(),
            );
            labels.insert("source_target".to_string(), target_name.clone());
            for (k, v) in &extra_group_labels {
                labels.insert(k.clone(), v.clone());
            }
            let label_str = if labels.is_empty() {
                String::new()
            } else {
                let pairs: Vec<String> = labels
                    .iter()
                    .map(|(k, v)| {
                        format!(
                            r#"{}="{}""#,
                            k,
                            v.replace('\\', "\\\\").replace('"', "\\\"")
                        )
                    })
                    .collect();
                format!("{{{}}}", pairs.join(","))
            };
            let _ = writeln!(&mut output, "{}{} {}", metric.name, label_str, metric.value);
        }
    }
    output
 }
 async fn render_pushed_metrics(stats_store: web::Data<StatsStore>) -> String {
    let guard = stats_store.read().await;
    let mut output = String::new();
    for (app_name, entry) in guard.iter() {
        let labels = [
            format!(r#"app="{}""#, app_name),
            "aggregated_by".to_string(),
            "metrics-aggregator".to_string(),
            "push_source=true".to_string(),
        ];
        let label_str = format!("{{{}}}", labels.join(","));
        let h = &entry;
        let _ = writeln!(
            &mut output,
            "push_http_requests_total{} {}",
            label_str, h.requests_total
        );
        let _ = writeln!(
            &mut output,
            "push_http_request_duration_ms_total{} {}",
            label_str, h.request_duration_ms_total
        );
        let _ = writeln!(
            &mut output,
            "push_http_requests_2xx{} {}",
            label_str, h.requests_2xx
        );
        let _ = writeln!(
            &mut output,
            "push_http_requests_4xx{} {}",
            label_str, h.requests_4xx
        );
        let _ = writeln!(
            &mut output,
            "push_http_requests_5xx{} {}",
            label_str, h.requests_5xx
        );
        for (endpoint, &count) in &h.endpoints {
            let sanitized = endpoint.replace([' ', '/'], "_").to_lowercase();
            let ep_labels = format!(
                r#"app="{}",endpoint="{}",aggregated_by="metrics-aggregator",push_source="true""#,
                app_name, sanitized
            );
            let _ = writeln!(
                &mut output,
                "push_http_endpoint_requests_total{{{}}} {}",
                ep_labels, count
            );
        }
        // System metrics in Prometheus format
        let sys_labels = format!(r#"app="{}",aggregated_by="metrics-aggregator""#, app_name);
        let _ = writeln!(
            &mut output,
            "system_cpu_usage_percent{{{}}} {}",
            sys_labels, h.cpu_usage_percent
        );
        let _ = writeln!(
            &mut output,
            "system_memory_used_mb{{{}}} {}",
            sys_labels, h.memory_used_mb
        );
        let _ = writeln!(
            &mut output,
            "system_memory_total_mb{{{}}} {}",
            sys_labels, h.memory_total_mb
        );
        let _ = writeln!(
            &mut output,
            "system_uptime_secs{{{}}} {}",
            sys_labels, h.uptime_secs
        );
        // Business counters
        for (counter_name, value) in &h.business {
            let biz_labels = format!(r#"app="{}",aggregated_by="metrics-aggregator""#, app_name);
            let _ = writeln!(&mut output, "{}{{{}}} {}", counter_name, biz_labels, value);
        }
        // Token usage
        let ai_labels = format!(r#"app="{}",aggregated_by="metrics-aggregator""#, app_name);
        let _ = writeln!(
            &mut output,
            "ai_input_tokens_total{{{}}} {}",
            ai_labels, h.ai_input_tokens_total
        );
        let _ = writeln!(
            &mut output,
            "ai_output_tokens_total{{{}}} {}",
            ai_labels, h.ai_output_tokens_total
        );
        let _ = writeln!(
            &mut output,
            "ai_calls_total{{{}}} {}",
            ai_labels, h.ai_calls_total
        );
        // Latency per endpoint
        for (endpoint, lat) in &h.latency {
            let lat_labels = format!(
                r#"app="{}",endpoint="{}",aggregated_by="metrics-aggregator""#,
                app_name, endpoint
            );
            let _ = writeln!(
                &mut output,
                "latency_p99_ms{{{}}} {}",
                lat_labels, lat.p99_ms
            );
            let _ = writeln!(
                &mut output,
                "latency_p90_ms{{{}}} {}",
                lat_labels, lat.p90_ms
            );
            let _ = writeln!(
                &mut output,
                "latency_p50_ms{{{}}} {}",
                lat_labels, lat.p50_ms
            );
            let _ = writeln!(
                &mut output,
                "latency_max_ms{{{}}} {}",
                lat_labels, lat.max_ms
            );
        }
    }
    output
 }
 // ── JSON API handlers ────────────────────────────────────────────────────────
 async fn handle_dashboard(stats_store: web::Data<StatsStore>) -> HttpResponse {
    let dashboard = stats_store::build_dashboard(&stats_store).await;
    let json = serde_json::to_string(&dashboard).unwrap_or_default();
    HttpResponse::Ok()
        .content_type("application/json")
        .body(json)
 }
 async fn handle_stats(stats_store: web::Data<StatsStore>) -> HttpResponse {
    // Returns per-app stats as JSON
    let guard = stats_store.read().await;
    let json = serde_json::to_string(&*guard).unwrap_or_default();
    HttpResponse::Ok()
        .content_type("application/json")
        .body(json)
 }
 async fn log_collector(loki: Option<LokiForwarder>, mut shutdown: broadcast::Receiver<()>) {
    let stdin = tokio::io::stdin();
    let mut reader = tokio::io::BufReader::new(stdin);
    let mut interval_tick = interval(Duration::from_secs(1));
    let mut batch: Vec<LokiEntry> = Vec::with_capacity(100);
    let mut line_buf = String::new();
    loop {
        tokio::select! {
            _ = shutdown.recv() => break,
            _ = interval_tick.tick() => {
                if !batch.is_empty() {
                    if let Some(ref loki) = loki {
                        if let Err(e) = loki.push(std::mem::take(&mut batch)).await {
                            tracing::warn!(error = %e, "Loki push failed");
                        }
                    }
                }
            }
            _ = async { line_buf.clear(); reader.read_line(&mut line_buf).await.ok() } => {
                if !line_buf.is_empty() {
                    let line = line_buf.trim_end().to_string();
                    if !line.is_empty() {
                        batch.push(LokiEntry {
                            timestamp: chrono::Utc::now(),
                            line,
                        });
                        if batch.len() >= 100 {
                            if let Some(ref loki) = loki {
                                if let Err(e) = loki.push(std::mem::take(&mut batch)).await {
                                    tracing::warn!(error = %e, "Loki push failed");
                                }
                            }
                        }
                    }
                }
            }
        }
    }
 }
--- a/apps/metrics/src/metrics.rs
+++ b/apps/metrics/src/metrics.rs
@ -1,101 +0,0 @@
 use metrics::{
    Counter, Gauge, Histogram, Unit, describe_counter, describe_gauge, describe_histogram,
 };
 pub fn init() {
    describe_gauge!(
        "aggregator_targets_total",
        Unit::Count,
        "Total number of scrape targets known to the aggregator"
    );
    describe_gauge!(
        "aggregator_targets_healthy",
        Unit::Count,
        "Number of scrape targets that responded last scrape"
    );
    describe_counter!(
        "aggregator_scrape_total",
        Unit::Count,
        "Total number of scrape attempts"
    );
    describe_counter!(
        "aggregator_scrape_success",
        Unit::Count,
        "Successful scrapes"
    );
    describe_counter!(
        "aggregator_scrape_failures",
        Unit::Count,
        "Failed scrape attempts"
    );
    describe_counter!(
        "aggregator_scrape_errors_parse",
        Unit::Count,
        "Scrape failures due to parse errors"
    );
    describe_counter!(
        "aggregator_scrape_errors_timeout",
        Unit::Count,
        "Scrape failures due to timeout"
    );
    describe_counter!(
        "aggregator_scrape_errors_connection",
        Unit::Count,
        "Scrape failures due to connection errors"
    );
    describe_counter!(
        "aggregator_targets_discovered",
        Unit::Count,
        "Total targets discovered"
    );
    describe_counter!(
        "aggregator_targets_lost",
        Unit::Count,
        "Total targets that disappeared"
    );
    describe_histogram!(
        "aggregator_scrape_duration_ms",
        Unit::Milliseconds,
        "Scrape duration in milliseconds"
    );
 }
 #[derive(Clone)]
 #[allow(dead_code)]
 pub struct AggMetrics {
    pub targets_total: Gauge,
    pub targets_healthy: Gauge,
    pub scrape_total: Counter,
    pub scrape_success: Counter,
    pub scrape_failures: Counter,
    pub scrape_errors_parse: Counter,
    pub scrape_errors_timeout: Counter,
    pub scrape_errors_connection: Counter,
    pub targets_discovered: Counter,
    pub targets_lost: Counter,
    pub scrape_duration: Histogram,
 }
 impl Default for AggMetrics {
    fn default() -> Self {
        Self {
            targets_total: metrics::gauge!("aggregator_targets_total"),
            targets_healthy: metrics::gauge!("aggregator_targets_healthy"),
            scrape_total: metrics::counter!("aggregator_scrape_total"),
            scrape_success: metrics::counter!("aggregator_scrape_success"),
            scrape_failures: metrics::counter!("aggregator_scrape_failures"),
            scrape_errors_parse: metrics::counter!("aggregator_scrape_errors_parse"),
            scrape_errors_timeout: metrics::counter!("aggregator_scrape_errors_timeout"),
            scrape_errors_connection: metrics::counter!("aggregator_scrape_errors_connection"),
            targets_discovered: metrics::counter!("aggregator_targets_discovered"),
            targets_lost: metrics::counter!("aggregator_targets_lost"),
            scrape_duration: metrics::histogram!("aggregator_scrape_duration_ms"),
        }
    }
 }
 impl AggMetrics {
    pub fn new() -> Self {
        Self::default()
    }
 }
--- a/apps/metrics/src/otel.rs
+++ b/apps/metrics/src/otel.rs
@ -1,42 +0,0 @@
 use anyhow::Context;
 use opentelemetry::trace::TracerProvider;
 use opentelemetry_otlp::{SpanExporter, WithExportConfig};
 use opentelemetry_sdk::trace as sdktrace;
 use tracing_opentelemetry::layer;
 use tracing_subscriber::prelude::*;
 pub struct OtelGuard {
    provider: sdktrace::SdkTracerProvider,
 }
 impl OtelGuard {
    pub async fn shutdown(self) {
        if let Err(e) = self.provider.shutdown() {
            tracing::warn!(error = %e, "OTLP shutdown error");
        }
    }
 }
 pub fn init_otel(endpoint: &str, service_name: &str) -> anyhow::Result<OtelGuard> {
    let exporter = SpanExporter::builder()
        .with_http()
        .with_endpoint(endpoint)
        .build()
        .context("build OTLP exporter")?;
    let tracer_provider = sdktrace::SdkTracerProvider::builder()
        .with_batch_exporter(exporter)
        .build();
    let tracer = tracer_provider.tracer(service_name.to_string());
    let otel_layer = layer().with_tracer(tracer);
    tracing_subscriber::registry()
        .with(otel_layer)
        .try_init()
        .context("install OTLP tracing subscriber")?;
    Ok(OtelGuard {
        provider: tracer_provider,
    })
 }
--- a/apps/metrics/src/scrape.rs
+++ b/apps/metrics/src/scrape.rs
@ -1,135 +0,0 @@
 use awc::Client;
 use std::collections::HashMap;
 use crate::target::ScrapeTarget;
 #[derive(Clone)]
 pub struct HttpClient {
    client: Client,
 }
 impl HttpClient {
    pub fn new(timeout_secs: u64) -> Self {
        let client = Client::builder()
            .timeout(std::time::Duration::from_secs(timeout_secs))
            .finish();
        Self { client }
    }
    pub async fn scrape(&self, target: &ScrapeTarget) -> ScrapeResult {
        let start = std::time::Instant::now();
        let url = target.url();
        let mut resp = match self.client.get(url).send().await {
            Ok(resp) => resp,
            Err(e) => {
                let msg = e.to_string();
                if msg.contains("timeout") || msg.contains("TimedOut") || msg.contains("timed out")
                {
                    return ScrapeResult::Timeout;
                }
                return ScrapeResult::ConnectionError(msg);
            }
        };
        if !resp.status().is_success() {
            return ScrapeResult::HttpError(resp.status().as_u16());
        }
        let body = match resp.body().await {
            Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
            Err(e) => return ScrapeResult::ConnectionError(e.to_string()),
        };
        let scrape_ms = start.elapsed().as_millis() as f64;
        ScrapeResult::Success(body, scrape_ms)
    }
 }
 pub enum ScrapeResult {
    Success(String, f64),
    Timeout,
    ConnectionError(String),
    HttpError(u16),
 }
 #[derive(Clone, Debug)]
 pub struct PromMetric {
    pub name: String,
    pub value: f64,
    pub labels: HashMap<String, String>,
 }
 pub fn parse_prometheus(body: &str) -> Vec<PromMetric> {
    let mut metrics = Vec::new();
    for line in body.lines() {
        let line = line.trim();
        if line.is_empty() || line.starts_with('#') {
            continue;
        }
        let (name_and_labels, value_str) = match line.find(' ') {
            Some(pos) => (&line[..pos], &line[pos + 1..]),
            None => continue,
        };
        let value: f64 = match value_str
            .split_whitespace()
            .next()
            .and_then(|v| v.parse().ok())
        {
            Some(v) => v,
            None => continue,
        };
        let (metric_name, labels) = if let Some(brace) = name_and_labels.find('{') {
            let name = &name_and_labels[..brace];
            let label_str = &name_and_labels[brace + 1..name_and_labels.len() - 1];
            let labels = parse_labels(label_str);
            (name.to_string(), labels)
        } else {
            (name_and_labels.to_string(), HashMap::new())
        };
        metrics.push(PromMetric {
            name: metric_name,
            value,
            labels,
        });
    }
    metrics
 }
 pub fn parse_labels(s: &str) -> HashMap<String, String> {
    let mut labels = HashMap::new();
    let mut remaining = s;
    while !remaining.is_empty() {
        if let Some(eq) = remaining.find('=') {
            let key = remaining[..eq].trim().to_string();
            remaining = &remaining[eq + 1..];
            let (value, rest) = if remaining.starts_with('"') {
                let end = remaining[1..]
                    .find('"')
                    .map(|p| p + 1)
                    .unwrap_or(remaining.len());
                (&remaining[1..end], &remaining[end + 1..])
            } else if remaining.starts_with('\'') {
                let end = remaining[1..]
                    .find('\'')
                    .map(|p| p + 1)
                    .unwrap_or(remaining.len());
                (&remaining[1..end], &remaining[end + 1..])
            } else {
                let end = remaining
                    .find(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
                    .unwrap_or(remaining.len());
                (&remaining[..end], &remaining[end..])
            };
            labels.insert(key, value.to_string());
            remaining = rest.trim_start_matches(',').trim_start();
        } else {
            break;
        }
    }
    labels
 }
--- a/apps/metrics/src/stats_store.rs
+++ b/apps/metrics/src/stats_store.rs
@ -1,217 +0,0 @@
 //! Stats store: receives expanded push payloads from all apps,
 //! aggregates over time, computes derived statistics (p99 etc),
 //! and provides JSON API for external consumption.
 use serde::Serialize;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;
 /// Per-app, per-instance aggregated stats entry.
 #[derive(Debug, Clone, Default, Serialize)]
 pub struct AppStats {
    /// Last seen timestamp.
    pub last_seen: i64,
    /// Number of push samples received.
    pub sample_count: u64,
    // ── HTTP ─────────────────────────────────────────────────────
    pub requests_total: u64,
    pub request_duration_ms_total: u64,
    pub requests_2xx: u64,
    pub requests_4xx: u64,
    pub requests_5xx: u64,
    pub endpoints: HashMap<String, u64>,
    // ── System ───────────────────────────────────────────────────
    pub cpu_usage_percent: f32,
    pub memory_used_mb: u64,
    pub memory_total_mb: u64,
    pub uptime_secs: u64,
    // ── Business counters ────────────────────────────────────────
    pub business: HashMap<String, f64>,
    // ── Token usage ──────────────────────────────────────────────
    pub ai_input_tokens_total: i64,
    pub ai_output_tokens_total: i64,
    pub ai_calls_total: i64,
    pub ai_calls_success: i64,
    pub ai_calls_failure: i64,
    pub token_by_model: HashMap<String, ModelTokenStats>,
    // ── Tasks ────────────────────────────────────────────────────
    pub tasks_queued: i64,
    pub tasks_running: i64,
    pub tasks_completed: i64,
    pub tasks_failed: i64,
    // ── Latency ──────────────────────────────────────────────────
    pub latency: HashMap<String, LatencyStats>,
    // ── Logs ─────────────────────────────────────────────────────
    #[serde(skip_serializing)]
    pub logs: Vec<(i64, String)>,
 }
 #[derive(Debug, Clone, Default, Serialize)]
 pub struct ModelTokenStats {
    pub input_tokens: i64,
    pub output_tokens: i64,
    pub calls: i64,
 }
 #[derive(Debug, Clone, Default, Serialize)]
 pub struct LatencyStats {
    pub p50_ms: f64,
    pub p90_ms: f64,
    pub p99_ms: f64,
    pub max_ms: f64,
    pub count: u64,
 }
 /// The global stats store: app_name → AppStats.
 pub type StatsStore = Arc<RwLock<HashMap<String, AppStats>>>;
 /// Merge a new push payload into the stats store.
 pub async fn merge_push_payload(
    store: &StatsStore,
    app: &str,
    _instance: &str,
    timestamp: i64,
    http: Option<&observability::push::HttpPayload>,
    system: Option<&observability::push::SystemPayload>,
    business: &HashMap<String, f64>,
    token_usage: Option<&observability::push::TokenUsagePayload>,
    tasks: Option<&observability::push::TaskStatsPayload>,
    latency: &HashMap<String, observability::push::LatencySnapshot>,
    logs: &[observability::push::LogEntry],
 ) {
    // Use app_name as key (merge across instances for aggregation)
    let mut guard = store.write().await;
    let entry = guard.entry(app.to_string()).or_default();
    entry.last_seen = timestamp;
    entry.sample_count += 1;
    // HTTP — accumulate (not replace, so we get totals over time)
    if let Some(http) = http {
        entry.requests_total = http.requests_total;
        entry.request_duration_ms_total = http.request_duration_ms_total;
        entry.requests_2xx = http.requests_2xx;
        entry.requests_4xx = http.requests_4xx;
        entry.requests_5xx = http.requests_5xx;
        for (ep, count) in &http.endpoints {
            *entry.endpoints.entry(ep.clone()).or_insert(0) = *count;
        }
    }
    // System — replace (current snapshot, not cumulative)
    if let Some(sys) = system {
        entry.cpu_usage_percent = sys.cpu_usage_percent;
        entry.memory_used_mb = sys.memory_used_mb;
        entry.memory_total_mb = sys.memory_total_mb;
        entry.uptime_secs = sys.uptime_secs;
    }
    // Business — replace with latest snapshot
    entry.business = business.clone();
    // Token usage — replace with latest
    if let Some(tu) = token_usage {
        entry.ai_input_tokens_total = tu.ai_input_tokens_total;
        entry.ai_output_tokens_total = tu.ai_output_tokens_total;
        entry.ai_calls_total = tu.ai_calls_total;
        entry.ai_calls_success = tu.ai_calls_success;
        entry.ai_calls_failure = tu.ai_calls_failure;
        for (model, usage) in &tu.by_model {
            let ms = entry.token_by_model.entry(model.clone()).or_default();
            ms.input_tokens = usage.input_tokens;
            ms.output_tokens = usage.output_tokens;
            ms.calls = usage.calls;
        }
    }
    // Tasks — replace with latest
    if let Some(t) = tasks {
        entry.tasks_queued = t.queued;
        entry.tasks_running = t.running;
        entry.tasks_completed = t.completed;
        entry.tasks_failed = t.failed;
    }
    // Latency — replace with latest snapshots
    for (endpoint, snap) in latency {
        let ls = entry.latency.entry(endpoint.clone()).or_default();
        ls.p50_ms = snap.p50_ms;
        ls.p90_ms = snap.p90_ms;
        ls.p99_ms = snap.p99_ms;
        ls.max_ms = snap.max_ms;
        ls.count = snap.count;
    }
    // Logs — append (keep last 300 lines)
    for log in logs {
        entry.logs.push((
            log.timestamp,
            format!("[{}] {}", log.level.to_lowercase(), log.message),
        ));
    }
    let cutoff = chrono::Utc::now().timestamp() - 300;
    entry.logs.retain(|(ts, _)| *ts >= cutoff);
 }
 /// Dashboard response combining all apps' stats.
 #[derive(Debug, Serialize)]
 pub struct DashboardResponse {
    /// Timestamp of this snapshot.
    pub timestamp: i64,
    /// Total number of app instances reporting.
    pub app_count: u64,
    /// Per-app aggregated stats.
    pub apps: HashMap<String, AppStats>,
    /// Derived: average p99 latency across all apps.
    pub avg_p99_ms: f64,
    /// Derived: total tokens consumed across all apps.
    pub total_input_tokens: i64,
    pub total_output_tokens: i64,
    /// Derived: total AI calls across all apps.
    pub total_ai_calls: i64,
 }
 /// Build the dashboard response from the stats store.
 pub async fn build_dashboard(store: &StatsStore) -> DashboardResponse {
    let guard = store.read().await;
    let mut avg_p99 = 0.0;
    let mut p99_count = 0;
    let mut total_input = 0i64;
    let mut total_output = 0i64;
    let mut total_calls = 0i64;
    for (_, stats) in guard.iter() {
        total_input += stats.ai_input_tokens_total;
        total_output += stats.ai_output_tokens_total;
        total_calls += stats.ai_calls_total;
        for (_, lat) in &stats.latency {
            avg_p99 += lat.p99_ms;
            p99_count += 1;
        }
    }
    let avg_p99_ms = if p99_count > 0 {
        avg_p99 / p99_count as f64
    } else {
        0.0
    };
    DashboardResponse {
        timestamp: chrono::Utc::now().timestamp(),
        app_count: guard.len() as u64,
        apps: guard.clone(),
        avg_p99_ms,
        total_input_tokens: total_input,
        total_output_tokens: total_output,
        total_ai_calls: total_calls,
    }
 }
--- a/apps/metrics/src/target.rs
+++ b/apps/metrics/src/target.rs
@ -1,36 +0,0 @@
 use anyhow::Context;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ScrapeTarget {
    pub name: String,
    pub addr: String,
    #[serde(default = "default_metrics_path")]
    pub metrics_path: String,
    #[serde(default)]
    pub labels: HashMap<String, String>,
 }
 fn default_metrics_path() -> String {
    "/metrics".to_string()
 }
 impl ScrapeTarget {
    pub fn url(&self) -> String {
        if self.metrics_path.starts_with("http") {
            self.metrics_path.clone()
        } else {
            format!("http://{}{}", self.addr, self.metrics_path)
        }
    }
 }
 pub async fn load_targets_from_file(path: &str) -> anyhow::Result<Vec<ScrapeTarget>> {
    let content = tokio::fs::read_to_string(path)
        .await
        .context("read targets file")?;
    let targets: Vec<ScrapeTarget> =
        serde_json::from_str(&content).with_context(|| format!("parse targets file {path}"))?;
    Ok(targets)
 }
--- a/apps/migrate/Cargo.toml
+++ b/apps/migrate/Cargo.toml
@ -1,13 +0,0 @@
 [package]
 name = "migrate-cli"
 version.workspace = true
 edition.workspace = true
 [dependencies]
 migrate.workspace = true
 sea-orm = { workspace = true, features = ["sqlx-all", "runtime-tokio"] }
 tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
 anyhow.workspace = true
 clap.workspace = true
 dotenvy.workspace = true
 config = { workspace = true }
--- a/apps/migrate/src/main.rs
+++ b/apps/migrate/src/main.rs
@ -1,102 +0,0 @@
 use anyhow::Context;
 use clap::Command;
 use migrate::MigratorTrait;
 use sea_orm::{Database, DatabaseConnection};
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    dotenvy::dotenv().ok();
    config::AppConfig::load();
    let cmd = Command::new("migrate")
        .about("Database migration CLI")
        .arg(
            clap::Arg::new("steps")
                .help("Number of migrations (for up/down)")
                .required(false)
                .index(1),
        )
        .subcommand(Command::new("up").about("Apply pending migrations"))
        .subcommand(Command::new("down").about("Revert applied migrations"))
        .subcommand(Command::new("fresh").about("Drop all tables and re-apply"))
        .subcommand(Command::new("refresh").about("Revert all then re-apply"))
        .subcommand(Command::new("reset").about("Revert all applied migrations"))
        .subcommand(Command::new("status").about("Show migration status"))
        .try_get_matches()
        .map_err(|e| anyhow::anyhow!("{}", e))?;
    let db_url = config::AppConfig::load().database_url()?;
    let db: DatabaseConnection = Database::connect(&db_url).await?;
    match cmd.subcommand_name() {
        Some("up") => {
            let steps = cmd
                .get_one::<String>("steps")
                .and_then(|s| s.parse().ok())
                .unwrap_or(0);
            run_up(&db, steps).await?;
        }
        Some("down") => {
            let steps = cmd
                .get_one::<String>("steps")
                .and_then(|s| s.parse().ok())
                .unwrap_or(1);
            run_down(&db, steps).await?;
        }
        Some("fresh") => run_fresh(&db).await?,
        Some("refresh") => run_refresh(&db).await?,
        Some("reset") => run_reset(&db).await?,
        Some("status") => run_status(&db).await?,
        _ => {
            eprintln!(
                "Usage: migrate <command>\nCommands: up, down, fresh, refresh, reset, status"
            );
            std::process::exit(1);
        }
    }
    Ok(())
 }
 async fn run_up(db: &DatabaseConnection, steps: u32) -> anyhow::Result<()> {
    migrate::Migrator::up(db, if steps == 0 { None } else { Some(steps) })
        .await
        .context("failed to run migrations up")?;
    Ok(())
 }
 async fn run_down(db: &DatabaseConnection, steps: u32) -> anyhow::Result<()> {
    migrate::Migrator::down(db, Some(steps))
        .await
        .context("failed to run migrations down")?;
    Ok(())
 }
 async fn run_fresh(db: &DatabaseConnection) -> anyhow::Result<()> {
    migrate::Migrator::fresh(db)
        .await
        .context("failed to run migrations fresh")?;
    Ok(())
 }
 async fn run_refresh(db: &DatabaseConnection) -> anyhow::Result<()> {
    migrate::Migrator::refresh(db)
        .await
        .context("failed to run migrations refresh")?;
    Ok(())
 }
 async fn run_reset(db: &DatabaseConnection) -> anyhow::Result<()> {
    migrate::Migrator::reset(db)
        .await
        .context("failed to run migrations reset")?;
    Ok(())
 }
 async fn run_status(db: &DatabaseConnection) -> anyhow::Result<()> {
    migrate::Migrator::status(db)
        .await
        .context("failed to get migration status")?;
    Ok(())
 }
--- a/apps/static/Cargo.toml
+++ b/apps/static/Cargo.toml
@ -1,21 +0,0 @@
 [package]
 name = "static-server"
 version.workspace = true
 edition.workspace = true
 [dependencies]
 actix-web = { workspace = true }
 actix-files = { workspace = true }
 actix-cors = { workspace = true }
 observability = { workspace = true }
 metrics-exporter-prometheus = "0.13"
 tokio = { workspace = true, features = ["full"] }
 futures = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 mime = { workspace = true }
 mime_guess2 = { workspace = true }
 slog = { workspace = true }
 anyhow = { workspace = true }
 env_logger = { workspace = true }
 log = "0.4"
--- a/apps/static/src/main.rs
+++ b/apps/static/src/main.rs
@ -1,212 +0,0 @@
 use actix_cors::Cors;
 use actix_files::Files;
 use actix_web::dev::{Service, ServiceRequest, ServiceResponse};
 use actix_web::{App, HttpResponse, HttpServer, http::header, web};
 use futures::future::LocalBoxFuture;
 use log::info;
 use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::time::Instant;
 /// Static file server for avatar, blob, and other static files
 /// Serves files from /data/{type} directories
 #[derive(Clone)]
 struct StaticConfig {
    root: PathBuf,
    cors_enabled: bool,
 }
 impl StaticConfig {
    fn from_env() -> Self {
        let root = std::env::var("STATIC_ROOT").unwrap_or_else(|_| "/data".to_string());
        let cors = std::env::var("STATIC_CORS").unwrap_or_else(|_| "true".to_string());
        Self {
            root: PathBuf::from(root),
            cors_enabled: cors == "true" || cors == "1",
        }
    }
    fn ensure_dir(&self, name: &str) -> PathBuf {
        let dir = self.root.join(name);
        if !dir.exists() {
            std::fs::create_dir_all(&dir).ok();
        }
        dir
    }
 }
 async fn health() -> HttpResponse {
    HttpResponse::Ok().json(serde_json::json!({
        "status": "ok",
        "service": "static-server"
    }))
 }
 /// Custom middleware that logs requests except for noisy paths (health, metrics, static files).
 struct RequestLogger;
 impl<S, B> actix_web::dev::Transform<S, ServiceRequest> for RequestLogger
 where
    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
    S::Future: 'static,
    B: 'static,
 {
    type Response = ServiceResponse<B>;
    type Error = actix_web::Error;
    type Transform = RequestLoggerService<S>;
    type InitError = ();
    type Future = futures::future::Ready<Result<Self::Transform, Self::InitError>>;
    fn new_transform(&self, service: S) -> Self::Future {
        futures::future::ok(RequestLoggerService {
            service,
            _marker: std::marker::PhantomData,
        })
    }
 }
 struct RequestLoggerService<S> {
    service: S,
    _marker: std::marker::PhantomData<fn(ServiceRequest)>,
 }
 impl<S, B> Service<ServiceRequest> for RequestLoggerService<S>
 where
    S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
    S::Future: 'static,
    B: 'static,
 {
    type Response = ServiceResponse<B>;
    type Error = actix_web::Error;
    type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
    fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
        self.service.poll_ready(cx)
    }
    fn call(&self, req: ServiceRequest) -> Self::Future {
        let path = req.path().to_string();
        let method = req.method().to_string();
        let should_log = !(path == "/health"
            || path == "/metrics"
            || path.starts_with("/ws")
            || path.starts_with("/avatar")
            || path.starts_with("/blob")
            || path.starts_with("/media")
            || path.starts_with("/static"));
        let start = Instant::now();
        let fut = self.service.call(req);
        Box::pin(async move {
            let res = fut.await?;
            if should_log {
                info!(
                    target: "static_server",
                    "{} {} {} {:?}",
                    method,
                    path,
                    res.status().as_u16(),
                    start.elapsed()
                );
            }
            Ok(res)
        })
    }
 }
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
    init_tracing_subscriber("info", false);
    let prometheus_handle = Arc::new(install_recorder());
    let http_metrics = Arc::new(HttpMetrics::new());
    // Metrics pusher: periodically push all metrics to apps/metrics aggregator
    if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
        let pusher = MetricsPusher::new(&push_url, "static");
        pusher.spawn(
            http_metrics.clone(),
            prometheus_handle.clone(),
            std::time::Duration::from_secs(15),
        );
        info!("Metrics pusher started (interval 15s, url: {})", push_url);
    }
    let cfg = StaticConfig::from_env();
    let bind = std::env::var("STATIC_BIND").unwrap_or_else(|_| "0.0.0.0:8081".to_string());
    println!("Static file server starting...");
    println!("  Root: {:?}", cfg.root);
    println!("  Bind: {}", bind);
    println!(
        "  CORS: {}",
        if cfg.cors_enabled {
            "enabled"
        } else {
            "disabled"
        }
    );
    // Ensure all directories exist
    for name in ["avatar", "blob", "media", "static"] {
        let dir = cfg.ensure_dir(name);
        println!("  {} dir: {:?}", name, dir);
    }
    let root = cfg.root.clone();
    let cors_enabled = cfg.cors_enabled;
    HttpServer::new(move || {
        let root = root.clone();
        let cors = if cors_enabled {
            // WARNING: allow_any_origin is intentional for static asset serving (CDN mode)
            // Ensure no sensitive files are served from this directory
            Cors::default()
                .allow_any_origin()
                .allowed_methods(vec!["GET", "HEAD", "OPTIONS"])
                .allowed_headers(vec![
                    header::AUTHORIZATION,
                    header::ACCEPT,
                    header::CONTENT_TYPE,
                ])
                .max_age(3600)
        } else {
            Cors::permissive()
        };
        App::new()
            .wrap(cors)
            .wrap(RequestLogger)
            .route("/health", web::get().to(health))
            .service(
                Files::new("/avatar", root.join("avatar"))
                    .prefer_utf8(true)
                    .index_file("index.html"),
            )
            .service(
                Files::new("/blob", root.join("blob"))
                    .prefer_utf8(true)
                    .index_file("index.html"),
            )
            .service(
                Files::new("/media", root.join("media"))
                    .prefer_utf8(true)
                    .index_file("index.html"),
            )
            .service(
                Files::new("/static", root.join("static"))
                    .prefer_utf8(true)
                    .index_file("index.html"),
            )
    })
    .bind(&bind)?
    .run()
    .await?;
    Ok(())
 }
--- a/bun.lock
+++ b/bun.lock
--- a/components.json
+++ b/components.json
@ -1,6 +1,6 @@
 {
  "$schema": "https://ui.shadcn.com/schema.json",
-  "style": "radix-nova",
+  "style": "base-nova",
  "rsc": false,
  "tsx": true,
  "tailwind": {
@ -12,6 +12,8 @@
  },
  "iconLibrary": "lucide",
  "rtl": false,
  "menuColor": "default",
  "menuAccent": "subtle",
  "aliases": {
    "components": "@/components",
    "utils": "@/lib/utils",
@ -19,9 +21,7 @@
    "lib": "@/lib",
    "hooks": "@/hooks"
  },
  "menuColor": "default",
  "menuAccent": "subtle",
  "registries": {
-    "@ai-elements": "https://ai-sdk.dev/elements/api/registry/{name}.json"
+    "@manifest": "https://ui.manifest.build/r/{name}.json"
  }
 }
--- a/deploy/.helmignore
+++ b/deploy/.helmignore
@ -1,25 +0,0 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
 # Secrets
 .server.yaml
--- a/deploy/Chart.yaml
+++ b/deploy/Chart.yaml
@ -1,6 +0,0 @@
 apiVersion: v2
 name: deploy
 description: Helm chart for the project backend services
 type: application
 version: 0.1.0
 appVersion: "0.2.9"
--- a/deploy/README.md
+++ b/deploy/README.md
@ -1,209 +0,0 @@
 # Deploy Helm Chart
 Monolithic Helm chart for all backend services.
 ## Services
 | Service              | Port(s)                 | Replicas | HPA      | Purpose                                     |
 |----------------------|-------------------------|----------|----------|---------------------------------------------|
 | `app`                | 3000 (HTTP)             | 2        | 2–10     | Main API server                             |
 | `gitserver`          | 8021 (HTTP), 2222 (SSH) | 1        | 1–5      | Git HTTP + SSH server                       |
 | `email_worker`       | 8084 (HTTP)             | 1        | disabled | Email queue consumer (single instance only) |
 | `git_hook`           | 8083 (HTTP)             | 1        | 1–5      | Git hook worker pool                        |
 | `metrics_aggregator` | 9090 (HTTP)             | 1        | 1–5      | Prometheus scrape + Loki push               |
 | `static_server`      | 8081 (HTTP)             | 1        | 1–5      | Static file server (avatars, blobs, media)  |
 ## Prerequisites
 The following resources must exist in the cluster **before** installing the Helm chart. They are not managed by Helm —
 install, upgrade, and uninstall of the chart will not touch them.
 ### 1. Namespace
 ```bash
 kubectl create namespace app
 ```
 ### 2. PVC (aliyun-nfs-app, 200Ti, ReadWriteMany)
 ```bash
 kubectl apply -f - <<'EOF'
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: shared-data
  namespace: app
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 200Ti
  storageClassName: aliyun-nfs-app
 EOF
 ```
 > The chart references this PVC by hardcoded name `shared-data`. This name is immutable — it cannot be changed via Helm
 > values.
 ### 3. ConfigMap
 ```bash
 kubectl apply -f - <<'EOF'
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: app-env
  namespace: app
 data:
  APP_REPOS_ROOT: "/data/repos"
  APP_AVATAR_PATH: "/data/avatars"
  STORAGE_PATH: "/data/files"
  STATIC_ROOT: "/data"
  APP_LOG_LEVEL: "info"
  APP_COOKIE_SECURE: "false"
  APP_DOMAIN_URL: "https://your-domain.com"
  APP_DATABASE_URL: "postgres://user:pass@postgres:5432/app"
  APP_REDIS_URL: "redis://redis:6379"
  APP_AI_BASIC_URL: "https://api.openai.com/v1"
  APP_AI_API_KEY: "sk-..."
  APP_SMTP_PASSWORD: "..."
  APP_SESSION_SECRET: "min-32-byte-random-string..."
  APP_SSH_SERVER_PRIVATE_KEY: "<hex-encoded-private-key>"
 EOF
 ```
 | Variable                     | Default / Example           | Required  |
 |------------------------------|-----------------------------|-----------|
 | `APP_REPOS_ROOT`             | `/data/repos`               | Yes       |
 | `APP_AVATAR_PATH`            | `/data/avatars`             | Yes       |
 | `STORAGE_PATH`               | `/data/files`               | Yes       |
 | `STATIC_ROOT`                | `/data`                     | Yes       |
 | `APP_LOG_LEVEL`              | `info`                      | No        |
 | `APP_COOKIE_SECURE`          | `false`                     | No        |
 | `APP_DOMAIN_URL`             | `https://your-domain.com`   | Yes       |
 | `APP_DATABASE_URL`           | `postgres://...`            | **Yes**   |
 | `APP_REDIS_URL`              | `redis://...`               | **Yes**   |
 | `APP_AI_BASIC_URL`           | `https://api.openai.com/v1` | **Yes**   |
 | `APP_AI_API_KEY`             | `sk-...`                    | **Yes**   |
 | `APP_SMTP_PASSWORD`          | `...`                       | **Yes**   |
 | `APP_SESSION_SECRET`         | min 32 bytes                | **Yes**   |
 | `APP_SSH_SERVER_PRIVATE_KEY` | hex-encoded PEM             | **Yes**   |
 | `APP_SSH_PORT`               | `2222`                      | Yes (k8s) |
 > **SSH host key**: `APP_SSH_SERVER_PRIVATE_KEY` must be the hex-encoded Ed25519 private key PEM bytes.
 > ```bash
 > ssh-keygen -t ed25519 -f /tmp/ssh_host_key -N ""
 > hexdump -v -e '/1 "%02x"' < /tmp/ssh_host_key
 > ```
 >
 > **Session secret**: generate 48 random bytes:
 > ```bash
 > openssl rand -base64 48
 > ```
 >
 > Override the ConfigMap name with `--set configMapName=your-cm-name`.
 ### 4. Verify prerequisites
 ```bash
 kubectl get namespace app
 kubectl get pvc -n app shared-data
 kubectl get configmap -n app app-env
 ```
 ## Quick Start
 ```bash
 helm template deploy ./deploy --namespace app --set imageRegistry=ghcr.io/your-org
 helm lint ./deploy
 # Install
 helm upgrade --install deploy ./deploy \
  --namespace app \
  --set imageRegistry=ghcr.io/your-org \
  --set imageTag=v0.2.9
 ```
 ## Storage
 All services share a single PVC (`shared-data`) via `subPath` mounts:
 | SubPath   | Mount           | Used By                  |
 |-----------|-----------------|--------------------------|
 | `repos`   | `/data/repos`   | app, gitserver, git-hook |
 | `avatars` | `/data/avatars` | app                      |
 | `files`   | `/data/files`   | app                      |
 | `static`  | `/data`         | static-server            |
 Pods run as UID/GID `1000` and set `fsGroup: 1000` so Git processes can create temporary object
 directories under bare repositories. If an existing PVC was previously written by another UID,
 fix ownership once from a maintenance pod:
 ```bash
 chown -R 1000:1000 /data/repos
 chmod -R u+rwX,g+rwX /data/repos
 ```
 ## Autoscaling
 All services except `email_worker` have HPA enabled by default. The email worker is fixed at 1 replica and must not be
 scaled.
 To adjust HPA bounds per service:
 ```bash
 --set services.app.autoscaling.maxReplicas=20
 --set services.app.autoscaling.targetCPUUtilization=70
 ```
 To disable HPA for a service:
 ```bash
 --set services.git_hook.autoscaling.enabled=false
 ```
 ## Ingress
 ```bash
 helm upgrade --install deploy ./deploy \
  --namespace app \
  --set ingress.enabled=true \
  --set ingress.className=nginx \
  --set ingress.hosts[0].host=your-domain.com
 ```
 ## Dependencies
 All services require these to be reachable from the cluster:
 - PostgreSQL (via `APP_DATABASE_URL`)
 - Redis (via `APP_REDIS_URL`)
 - Git binary (included in all Docker images)
 - OpenAI-compatible API (via `APP_AI_BASIC_URL` + `APP_AI_API_KEY`)
 - Qdrant vector DB (via `APP_QDRANT_URL`)
 - SMTP server (via `APP_SMTP_*`)
 - Embedding model (via `APP_EMBED_MODEL_*`)
 Optional dependencies with graceful degradation:
 | Dependency     | Variable                      | Fallback         |
 |----------------|-------------------------------|------------------|
 | NATS JetStream | `NATS_URL` + `NATS_TOKEN`     | Redis queue      |
 | Loki           | `LOKI_URL`                    | Logs discarded   |
 | OTEL Collector | `OTEL_EXPORTER_OTLP_ENDPOINT` | Tracing disabled |
 ## Production Example
 ```bash
 helm upgrade --install deploy ./deploy \
  --namespace app \
  --set imageRegistry=ghcr.io/your-org \
  --set imageTag=v0.2.9 \
  --set services.app.replicas=3 \
  --set services.app.autoscaling.maxReplicas=20 \
  --set ingress.enabled=true \
  --set ingress.className=nginx \
  --set ingress.hosts[0].host=your-domain.com \
  --set configMapName=app-env
 ```
--- a/deploy/templates/NOTES.txt
+++ b/deploy/templates/NOTES.txt
@ -1,19 +0,0 @@
 Project backend services deployed to namespace: {{ .Release.Namespace }}
 Services:
 {{- range $svcKey, $svcVal := .Values.services }}
  {{ $svcKey | replace "_" "-" }}: {{ if $svcVal.ports }}{{ range $portName, $portNum := $svcVal.ports }}{{ $portName }}={{ $portNum }} {{ end }}{{ else }}port={{ $svcVal.port }}{{ end }} {{ if $svcVal.autoscaling.enabled }}(HPA: {{ $svcVal.autoscaling.minReplicas }}-{{ $svcVal.autoscaling.maxReplicas }}){{ else }}(static: {{ $svcVal.replicaCount }}){{ end }}
 {{- end }}
 To access the app locally:
  kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "deploy.serviceFullname" (dict "root" . "svcKey" "app") }} 3000:3000
 To check HPA status:
 {{- range $svcKey, $svcVal := .Values.services }}
 {{- if $svcVal.autoscaling.enabled }}
  kubectl get hpa -n {{ $.Release.Namespace }} {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" $svcKey) }}
 {{- end }}
 {{- end }}
 To check all pods:
  kubectl get pods -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "deploy.name" . }}"
--- a/deploy/templates/_helpers.tpl
+++ b/deploy/templates/_helpers.tpl
@ -1,78 +0,0 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "deploy.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 */}}
 {{- define "deploy.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Service fullname — includes service key for per-service resources.
 Underscores in svcKey are replaced with hyphens for valid Kubernetes names.
 */}}
 {{- define "deploy.serviceFullname" -}}
 {{- printf "%s-%s" (include "deploy.fullname" .root) (.svcKey | replace "_" "-") | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Chart name and version as used by the chart label.
 */}}
 {{- define "deploy.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "deploy.labels" -}}
 helm.sh/chart: {{ include "deploy.chart" . }}
 {{ include "deploy.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "deploy.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "deploy.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Per-service selector labels — used by Service to target the right Deployment.
 Underscores in svcKey are replaced with hyphens for valid Kubernetes label values.
 */}}
 {{- define "deploy.serviceSelectorLabels" -}}
 app.kubernetes.io/name: {{ include "deploy.name" .root }}
 app.kubernetes.io/instance: {{ .root.Release.Name }}
 app.kubernetes.io/component: {{ .svcKey | replace "_" "-" }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "deploy.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "deploy.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/deploy/templates/app/deployment.yaml
+++ b/deploy/templates/app/deployment.yaml
@ -1,89 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "app") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: app
 spec:
  replicas: {{ .Values.services.app.replicaCount | default 1 }}
  selector:
    matchLabels:
      {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "app") | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "deploy.labels" . | nindent 8 }}
        app.kubernetes.io/component: app
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "deploy.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: app
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.imageRegistry }}/{{ .Values.services.app.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
          imagePullPolicy: IfNotPresent
          {{- with .Values.services.app.command }}
          command:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          ports:
            - name: http
              containerPort: {{ .Values.services.app.port }}
              protocol: TCP
          envFrom:
            - configMapRef:
                name: {{ .Values.configMapName }}
          {{- with .Values.services.app.extraEnv }}
          env:
            {{- range $key, $val := . }}
            - name: {{ $key }}
              value: {{ $val | quote }}
            {{- end }}
          {{- end }}
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
          {{- with .Values.services.app.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.services.app.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      volumes:
        - name: shared-data
          persistentVolumeClaim:
            claimName: shared-data
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/deploy/templates/app/service.yaml
+++ b/deploy/templates/app/service.yaml
@ -1,16 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "app") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: app
 spec:
  type: ClusterIP
  ports:
    - port: {{ .Values.services.app.port }}
      targetPort: http
      protocol: TCP
      name: http
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "app") | nindent 4 }}
--- a/deploy/templates/email_worker/deployment.yaml
+++ b/deploy/templates/email_worker/deployment.yaml
@ -1,70 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "email_worker") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: email-worker
 spec:
  replicas: {{ .Values.services.email_worker.replicaCount | default 1 }}
  selector:
    matchLabels:
      {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "email_worker") | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "deploy.labels" . | nindent 8 }}
        app.kubernetes.io/component: email-worker
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "deploy.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: email-worker
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.imageRegistry }}/{{ .Values.services.email_worker.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
          imagePullPolicy: IfNotPresent
          ports:
            - name: http
              containerPort: {{ .Values.services.email_worker.port }}
              protocol: TCP
          envFrom:
            - configMapRef:
                name: {{ .Values.configMapName }}
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
          {{- with .Values.services.email_worker.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/deploy/templates/email_worker/service.yaml
+++ b/deploy/templates/email_worker/service.yaml
@ -1,16 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "email_worker") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: email-worker
 spec:
  type: ClusterIP
  ports:
    - port: {{ .Values.services.email_worker.port }}
      targetPort: http
      protocol: TCP
      name: http
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "email_worker") | nindent 4 }}
--- a/deploy/templates/git_hook/deployment.yaml
+++ b/deploy/templates/git_hook/deployment.yaml
@ -1,78 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "git_hook") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: git-hook
 spec:
  replicas: {{ .Values.services.git_hook.replicaCount | default 1 }}
  selector:
    matchLabels:
      {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "git_hook") | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "deploy.labels" . | nindent 8 }}
        app.kubernetes.io/component: git-hook
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "deploy.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: git-hook
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.imageRegistry }}/{{ .Values.services.git_hook.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
          imagePullPolicy: IfNotPresent
          ports:
            - name: http
              containerPort: {{ .Values.services.git_hook.port }}
              protocol: TCP
          envFrom:
            - configMapRef:
                name: {{ .Values.configMapName }}
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
          {{- with .Values.services.git_hook.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.services.git_hook.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      volumes:
        - name: shared-data
          persistentVolumeClaim:
            claimName: shared-data
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/deploy/templates/git_hook/service.yaml
+++ b/deploy/templates/git_hook/service.yaml
@ -1,16 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "git_hook") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: git-hook
 spec:
  type: ClusterIP
  ports:
    - port: {{ .Values.services.git_hook.port }}
      targetPort: http
      protocol: TCP
      name: http
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "git_hook") | nindent 4 }}
--- a/deploy/templates/gitserver/deployment.yaml
+++ b/deploy/templates/gitserver/deployment.yaml
@ -1,88 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "gitserver") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: gitserver
 spec:
  replicas: {{ .Values.services.gitserver.replicaCount | default 1 }}
  selector:
    matchLabels:
      {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "gitserver") | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "deploy.labels" . | nindent 8 }}
        app.kubernetes.io/component: gitserver
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "deploy.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: gitserver
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.imageRegistry }}/{{ .Values.services.gitserver.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
          imagePullPolicy: IfNotPresent
          ports:
            - name: http
              containerPort: {{ .Values.services.gitserver.ports.http }}
              protocol: TCP
            - name: ssh
              containerPort: {{ .Values.services.gitserver.ports.ssh }}
              protocol: TCP
          envFrom:
            - configMapRef:
                name: {{ .Values.configMapName }}
          {{- with .Values.services.gitserver.extraEnv }}
          env:
            {{- range $key, $val := . }}
            - name: {{ $key }}
              value: {{ $val | quote }}
            {{- end }}
          {{- end }}
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
          {{- with .Values.services.gitserver.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.services.gitserver.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      volumes:
        - name: shared-data
          persistentVolumeClaim:
            claimName: shared-data
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/deploy/templates/gitserver/service.yaml
+++ b/deploy/templates/gitserver/service.yaml
@ -1,20 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "gitserver") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: gitserver
 spec:
  type: ClusterIP
  ports:
    - port: {{ .Values.services.gitserver.ports.http }}
      targetPort: http
      protocol: TCP
      name: http
    - port: {{ .Values.services.gitserver.ports.ssh }}
      targetPort: ssh
      protocol: TCP
      name: ssh
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "gitserver") | nindent 4 }}
--- a/deploy/templates/gitserver/ssh-service.yaml
+++ b/deploy/templates/gitserver/ssh-service.yaml
@ -1,21 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "gitserver") }}-ssh
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: gitserver
  annotations:
    {{- with .Values.services.gitserver.sshService.annotations }}
    {{- toYaml . | nindent 4 }}
    {{- end }}
 spec:
  type: LoadBalancer
  externalTrafficPolicy: Local
  ports:
    - port: 22
      targetPort: ssh
      protocol: TCP
      name: ssh
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "gitserver") | nindent 4 }}
--- a/deploy/templates/hpa.yaml
+++ b/deploy/templates/hpa.yaml
@ -1,26 +0,0 @@
 {{- range $svcKey, $svcVal := .Values.services }}
 {{- if $svcVal.autoscaling.enabled }}
 ---
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" $svcKey) }}
  labels:
    {{- include "deploy.labels" $ | nindent 4 }}
    app.kubernetes.io/component: {{ $svcKey | replace "_" "-" }}
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" $svcKey) }}
  minReplicas: {{ $svcVal.autoscaling.minReplicas }}
  maxReplicas: {{ $svcVal.autoscaling.maxReplicas }}
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ $svcVal.autoscaling.targetCPUUtilization }}
 {{- end }}
 {{- end }}
--- a/deploy/templates/ingress.yaml
+++ b/deploy/templates/ingress.yaml
@ -1,41 +0,0 @@
 {{- if .Values.ingress.enabled -}}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: {{ include "deploy.fullname" . }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
  {{- with .Values.ingress.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 spec:
  {{- with .Values.ingress.className }}
  ingressClassName: {{ . }}
  {{- end }}
  {{- if .Values.ingress.tls }}
  tls:
    {{- range .Values.ingress.tls }}
    - hosts:
        {{- range .hosts }}
        - {{ . | quote }}
        {{- end }}
      secretName: {{ .secretName }}
    {{- end }}
  {{- end }}
  rules:
    {{- range .Values.ingress.hosts }}
    - host: {{ .host | quote }}
      http:
        paths:
          {{- range .paths }}
          - path: {{ .path }}
            pathType: {{ .pathType }}
            backend:
              service:
                name: {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" .serviceName) }}
                port:
                  number: {{ .servicePort }}
          {{- end }}
    {{- end }}
 {{- end }}
--- a/deploy/templates/metrics_aggregator/deployment.yaml
+++ b/deploy/templates/metrics_aggregator/deployment.yaml
@ -1,70 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "metrics_aggregator") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: metrics-aggregator
 spec:
  replicas: {{ .Values.services.metrics_aggregator.replicaCount | default 1 }}
  selector:
    matchLabels:
      {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "metrics_aggregator") | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "deploy.labels" . | nindent 8 }}
        app.kubernetes.io/component: metrics-aggregator
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "deploy.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: metrics-aggregator
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.imageRegistry }}/{{ .Values.services.metrics_aggregator.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
          imagePullPolicy: IfNotPresent
          ports:
            - name: http
              containerPort: {{ .Values.services.metrics_aggregator.port }}
              protocol: TCP
          envFrom:
            - configMapRef:
                name: {{ .Values.configMapName }}
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
          {{- with .Values.services.metrics_aggregator.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/deploy/templates/metrics_aggregator/service.yaml
+++ b/deploy/templates/metrics_aggregator/service.yaml
@ -1,16 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "metrics_aggregator") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: metrics-aggregator
 spec:
  type: ClusterIP
  ports:
    - port: {{ .Values.services.metrics_aggregator.port }}
      targetPort: http
      protocol: TCP
      name: http
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "metrics_aggregator") | nindent 4 }}
--- a/deploy/templates/secret.yaml
+++ b/deploy/templates/secret.yaml
@ -1 +0,0 @@
 {{/* Secret disabled — all config via ConfigMap */}}
--- a/deploy/templates/serviceaccount.yaml
+++ b/deploy/templates/serviceaccount.yaml
@ -1,13 +0,0 @@
 {{- if .Values.serviceAccount.create -}}
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "deploy.serviceAccountName" . }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
 {{- end }}
--- a/deploy/templates/static_server/deployment.yaml
+++ b/deploy/templates/static_server/deployment.yaml
@ -1,78 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "static_server") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: static-server
 spec:
  replicas: {{ .Values.services.static_server.replicaCount | default 1 }}
  selector:
    matchLabels:
      {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "static_server") | nindent 6 }}
  template:
    metadata:
      labels:
        {{- include "deploy.labels" . | nindent 8 }}
        app.kubernetes.io/component: static-server
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "deploy.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: static-server
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.imageRegistry }}/{{ .Values.services.static_server.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
          imagePullPolicy: IfNotPresent
          ports:
            - name: http
              containerPort: {{ .Values.services.static_server.port }}
              protocol: TCP
          envFrom:
            - configMapRef:
                name: {{ .Values.configMapName }}
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 15
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 5
            periodSeconds: 10
          {{- with .Values.services.static_server.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.services.static_server.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      volumes:
        - name: shared-data
          persistentVolumeClaim:
            claimName: shared-data
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/deploy/templates/static_server/service.yaml
+++ b/deploy/templates/static_server/service.yaml
@ -1,16 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "static_server") }}
  labels:
    {{- include "deploy.labels" . | nindent 4 }}
    app.kubernetes.io/component: static-server
 spec:
  type: ClusterIP
  ports:
    - port: {{ .Values.services.static_server.port }}
      targetPort: http
      protocol: TCP
      name: http
  selector:
    {{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "static_server") | nindent 4 }}
--- a/deploy/values.yaml
+++ b/deploy/values.yaml
@ -1,212 +0,0 @@
 # Global image registry and tag
 imageRegistry: ""
 imageTag: ""
 # External ConfigMap (managed outside Helm)
 configMapName: "app-env"
 # Service definitions
 services:
  app:
    repository: app
    port: 3000
    replicaCount: 2
    autoscaling:
      enabled: true
      minReplicas: 2
      maxReplicas: 10
      targetCPUUtilization: 80
    command:
      - "app"
      - "--bind"
      - "0.0.0.0:3000"
    resources:
      requests:
        cpu: 200m
        memory: 256Mi
      limits:
        cpu: "1"
        memory: 512Mi
    volumeMounts:
      - name: shared-data
        mountPath: /data/repos
        subPath: repos
      - name: shared-data
        mountPath: /data/avatars
        subPath: avatars
      - name: shared-data
        mountPath: /data/files
        subPath: files
  email_worker:
    repository: email-worker
    port: 8084
    replicaCount: 1
    autoscaling:
      enabled: false  # email must stay at 1 replica
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 500m
        memory: 256Mi
  git_hook:
    repository: git-hook
    port: 8083
    replicaCount: 1
    autoscaling:
      enabled: true
      minReplicas: 1
      maxReplicas: 5
      targetCPUUtilization: 80
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 500m
        memory: 256Mi
    volumeMounts:
      - name: shared-data
        mountPath: /data/repos
        subPath: repos
  gitserver:
    repository: gitserver
    ports:
      http: 8021
      ssh: 2222
    replicaCount: 1
    autoscaling:
      enabled: true
      minReplicas: 1
      maxReplicas: 5
      targetCPUUtilization: 80
    # SSH port must match the containerPort
    extraEnv:
      APP_SSH_PORT: "2222"
    # SSH service config (MetalLB + Cilium)
    # Shared IP: nginx ingress (80/443) + SSH (22) on same VIP
    # Requires ingress-nginx svc also annotated with allow-shared-ip: "gitdata-shared"
    sshService:
      annotations: {}
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 500m
        memory: 256Mi
    volumeMounts:
      - name: shared-data
        mountPath: /data/repos
        subPath: repos
  metrics_aggregator:
    repository: metrics-aggregator
    port: 9090
    replicaCount: 1
    autoscaling:
      enabled: true
      minReplicas: 1
      maxReplicas: 5
      targetCPUUtilization: 80
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 500m
        memory: 256Mi
  static_server:
    repository: static-server
    port: 8081
    replicaCount: 1
    autoscaling:
      enabled: true
      minReplicas: 1
      maxReplicas: 5
      targetCPUUtilization: 80
    resources:
      requests:
        cpu: 50m
        memory: 64Mi
      limits:
        cpu: 200m
        memory: 128Mi
    volumeMounts:
      - name: shared-data
        mountPath: /data
        subPath: static
 # Ingress
 ingress:
  enabled: true
  className: "nginx"
  annotations:
    cert-manager.io/cluster-issuer: "cloudflare-acme-cluster-issuer"
    nginx.ingress.kubernetes.io/proxy-body-size: "0"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
    nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
    nginx.ingress.kubernetes.io/affinity: "cookie"
    nginx.ingress.kubernetes.io/session-cookie-name: "INGRESSROUTE"
    nginx.ingress.kubernetes.io/session-cookie-path: "/"
    nginx.ingress.kubernetes.io/session-cookie-max-age: "86400"
    nginx.ingress.kubernetes.io/enable-real-ip: "true"
    nginx.ingress.kubernetes.io/real-ip-header: "X-Forwarded-For"
    nginx.ingress.kubernetes.io/use-forwarded-headers: "true"
  hosts:
    - host: gitdata.ai
      paths:
        - path: /
          pathType: Prefix
          serviceName: app
          servicePort: 3000
    - host: static.gitdata.ai
      paths:
        - path: /
          pathType: Prefix
          serviceName: static_server
          servicePort: 8081
    - host: git.gitdata.ai
      paths:
        - path: /
          pathType: Prefix
          serviceName: gitserver
          servicePort: 8021
  tls:
    - secretName: gitdata-ai-tls
      hosts:
        - gitdata.ai
        - static.gitdata.ai
        - git.gitdata.ai
 imagePullSecrets: []
 nameOverride: ""
 fullnameOverride: ""
 serviceAccount:
  create: true
  automount: true
  annotations: {}
  name: ""
 podSecurityContext:
  runAsNonRoot: true
  runAsUser: 1000
  runAsGroup: 1000
  fsGroup: 1000
  fsGroupChangePolicy: OnRootMismatch
 securityContext:
  capabilities:
    drop:
      - ALL
  readOnlyRootFilesystem: false
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/docker/app.Dockerfile
+++ b/docker/app.Dockerfile
@ -1,9 +0,0 @@
 FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates libssl3 openssh-client procps git \
    && rm -rf /var/lib/apt/lists/*
 RUN git config --system --add safe.directory '*'
 WORKDIR /app
 COPY ./target/release/app /bin
 EXPOSE 3000
 CMD ["app"]
--- a/docker/email.Dockerfile
+++ b/docker/email.Dockerfile
@ -1,8 +1,61 @@
-FROM ubuntu:24.04
+# GitDataAI Backend - Email Service
-RUN apt-get update && apt-get install -y --no-install-recommends \
+# Multi-stage build for Rust application
-    ca-certificates libssl3 \
+
 # Stage 1: Build the application
 FROM rust:1.96-bookworm AS builder
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    pkg-config \
    libssl-dev \
    libpq-dev \
    cmake \
    && rm -rf /var/lib/apt/lists/*
 # Create app directory
 WORKDIR /app
-COPY ./target/release/email-worker /bin
+
-EXPOSE 8084
+# Copy workspace files
-CMD ["email-worker"]
+COPY Cargo.toml Cargo.lock ./
 COPY app/ app/
 COPY lib/ lib/
 # Build the application in release mode
 RUN cargo build --release --bin email-service
 # Stage 2: Create runtime image
 FROM debian:bookworm-slim
 # Install runtime dependencies
 RUN apt-get update && apt-get install -y \
    libssl3 \
    libpq5 \
    ca-certificates \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Create non-root user
 RUN useradd -r -s /bin/false appuser
 # Create directories
 RUN mkdir -p /app/logs \
    && chown -R appuser:appuser /app
 # Copy binary from builder
 COPY --from=builder /app/target/release/email-service /app/email-service
 # Set ownership
 RUN chown -R appuser:appuser /app
 # Switch to non-root user
 USER appuser
 # Set working directory
 WORKDIR /app
 # Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
    CMD pgrep email-service || exit 1
 # Run the application
 CMD ["./email-service"]
--- a/docker/githook.Dockerfile
+++ b/docker/githook.Dockerfile
@ -1,9 +0,0 @@
 FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates libssl3 git \
    && rm -rf /var/lib/apt/lists/*
 RUN git config --system --add safe.directory '*'
 WORKDIR /app
 COPY ./target/release/git-hook /bin
 EXPOSE 8083
 CMD ["git-hook"]
--- a/docker/gitserver.Dockerfile
+++ b/docker/gitserver.Dockerfile
@ -1,9 +0,0 @@
 FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates libssl3 git openssh-client \
    && rm -rf /var/lib/apt/lists/*
 RUN git config --system --add safe.directory '*'
 WORKDIR /app
 COPY ./target/release/gitserver /bin
 EXPOSE 8021 2222
 CMD ["gitserver"]
--- a/docker/metrics.Dockerfile
+++ b/docker/metrics.Dockerfile
@ -1,8 +0,0 @@
 FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates libssl3 \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY ./target/release/metrics-aggregator /bin
 EXPOSE 9090
 CMD ["metrics-aggregator"]
--- a/docker/static.Dockerfile
+++ b/docker/static.Dockerfile
@ -1,8 +0,0 @@
 FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates libssl3 \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY ./target/release/static-server /bin
 EXPOSE 8081
 CMD ["static-server"]
--- a/eslint.config.js
+++ b/eslint.config.js
@ -16,15 +16,7 @@ export default defineConfig([
      reactRefresh.configs.vite,
    ],
    languageOptions: {
      ecmaVersion: 2020,
      globals: globals.browser,
    },
    rules: {
      'react-refresh/only-export-components': [
        'warn',
        { allowExportNames: ['useThemeCustomization', 'useThemePreset', 'useRoom', 'useOptionalRoom', 'resetAllThemeVars', 'loadThemeVars', 'applyThemePreset'] },
      ],
      'react-hooks/exhaustive-deps': 'warn',
    },
  },
 ])
--- a/gene.md
+++ b/gene.md
@ -1,901 +0,0 @@
 # Gene 方案
 ## 这份文档针对什么
 这个项目里，`Skill` 已经不是一个抽象概念，而是完整的业务实体：
 * 后端有 `project_skill` 持久化模型
 * Git 同步会扫描仓库里的 `SKILL.md`
 * 聊天构建会把启用的技能注入上下文
 * 前端有技能列表、详情、编辑、删除、扫描
 * 内建技能模板也已经存在
 所以这里不应该“用 Gene 替换 Skill”，而应该是：
 * `Skill` 继续负责执行和交付能力
 * `Gene` 作为新增元层，负责让技能可演化、可比较、可追踪、可继承
 ---
 ## 设计结论
 `Gene` 的正确位置不是新的执行系统，也不是 `Skill` 的替代品，而是 `Skill` 的生命周期治理层。
 ```text
 Skill = 可执行内容 + 上下文注入 + 业务交付
 Gene = 演化族 + 版本谱系 + 评估记录 + 选择记录
 ```
 更准确地说：
 ```text
 Skill 是可执行的技能内容，负责被扫描、编辑、启用、注入和交付能力。
 Gene 是 Skill 的演化族，负责组织一个 Skill 在项目内的版本、来源、谱系、评估和选择记录。
 GeneRevision 是 Gene 下的不可变版本节点，绑定到某个 Skill 内容快照。
 GeneEvaluation 是对某个 GeneRevision 的评估结果。
 GeneSelection 是对当前有效 GeneRevision 的显式选择记录。
 ```
 ---
 ## 项目现状
 ### Skill 已经落在这些地方
 * `libs/models/projects/project_skill.rs`
    * 定义了 `project_skill` 实体
    * 已包含 `source`
    * 已包含 `repo_id`
    * 已包含 `commit_sha`
    * 已包含 `blob_hash`
    * 已包含 `content`
    * 已包含 `metadata`
    * 已包含 `enabled`
 * `libs/git/hook/sync/mod.rs`
    * 扫描仓库中的 `SKILL.md`
    * 从 frontmatter 解析 `name`、`description`、`license`、`compatibility`
    * 用 `commit_sha` 和 `blob_hash` 做增量同步
 * `libs/agent/skills/templates.rs`
    * 内建技能模板已经编译进程序
    * 这些模板本质上是“系统内置 Skill”
 * `libs/agent/chat/message_builder.rs`
    * 读取项目中启用的技能
    * 把技能注入对话上下文
    * 和 embedding / perception 一起影响模型行为
 * `src/app/project/skills/*`
    * 已有技能管理 UI
    * 能查看、编辑、删除、扫描技能
 ---
 ## 现有 Skill 的问题
 当前 `Skill` 已经能用，但还不够“可进化”：
 * 只有内容，没有明确的演化关系
 * 只有当前状态，没有谱系
 * 只有启用/禁用，没有版本选择
 * 只有来源信息，没有变体比较
 * 只有同步时间和 blob hash，没有“为什么变成这样”的记录
 * 有评估空间，但没有评估结果和版本绑定
 * 有扫描和编辑能力，但没有可审计的回滚与选择记录
 换句话说，现在的 `Skill` 更像“静态资产”，还不是“进化单元”。
 ---
 ## 设计原则
 ### 1. Skill 仍然是唯一执行实体
 `Skill` 继续负责：
 * 被扫描
 * 被编辑
 * 被启用或禁用
 * 被注入聊天上下文
 * 交付实际能力
 `Gene` 不直接执行任务，不直接调用工具，不直接参与上下文拼装。
 ---
 ### 2. Gene 只管理 Skill 的演化元数据
 `Gene` 管理的是：
 * 版本
 * 来源
 * 父子关系
 * 变体
 * 评估
 * 选择
 * 淘汰
 * 回滚
 它不应该成为第二套 `Skill` 系统。
 ---
 ### 3. GeneRevision 必须不可变
 `Skill` 可以是当前可编辑对象。
 `GeneRevision` 是不可变演化节点。一旦创建，不应再修改：
 * 内容快照
 * 父代关系
 * 来源信息
 * `commit_sha`
 * `blob_hash`
 * `content_hash`
 * `mutation_reason`
 * `mutation_diff`
 后续任何内容变化、prompt 变化、工具权限变化、上下文注入规则变化，都应该产生新的 `GeneRevision`。
 ---
 ### 4. Evaluation 必须绑定到具体 Revision
 评估不是评价一个抽象的 `Gene`，而是评价某个具体版本。
 因此：
 ```text
 GeneEvaluation 必须绑定 revision_id。
 ```
 否则同一个 `Gene` 下存在多个版本时，评估结果无法精确归因。
 ---
 ### 5. Selection 必须显式记录
 如果系统选择了某个版本作为当前有效版本，必须记录：
 * 选中了哪个 revision
 * 为什么选它
 * 谁选的
 * 依据什么策略选的
 * 什么时候选的
 * 是否仍然 active
 这能支持审计、回滚和后续自动选择。
 ---
 ### 6. SKILL.md 仍然是仓库内 Skill 的事实来源
 仓库里的 `SKILL.md` 仍然是 Git 同步的事实来源。
 `Gene` 只记录它如何演化，不改变仓库同步的基本语义。
 ---
 ## 核心概念
 ### Skill
 `Skill` 是现有业务实体。
 它负责：
 ```text
 可执行内容
 上下文注入
 用户可见编辑
 Git 扫描
 启用 / 禁用
 业务交付
 ```
 ### Gene
 `Gene` 是某个 Skill 的演化族。
 它负责组织这个 Skill 的生命周期：
 ```text
 这个能力从哪里来
 经历过哪些版本
 有哪些变体
 评估结果如何
 当前选择哪个版本
 哪些版本被淘汰
 ```
 ### GeneRevision
 `GeneRevision` 是 `Gene` 下的一个不可变版本节点。
 它绑定某个 `Skill` 的内容状态，例如：
 ```text
 skill_id
 skill_slug
 commit_sha
 blob_hash
 content_hash
 content_snapshot_ref
 ```
 ### GeneEvaluation
 `GeneEvaluation` 是对某个 `GeneRevision` 的评估结果。
 它回答：
 ```text
 这个版本好不好？
 在哪个数据集上测的？
 指标是什么？
 是否通过？
 成本和延迟如何？
 失败样本是什么？
 ```
 ### GeneSelection
 `GeneSelection` 是对当前有效版本的显式选择记录。
 它回答：
 ```text
 当前选中哪个 revision？
 为什么选它？
 谁选的？
 依据什么策略？
 是否还在生效？
 ```
 ---
 ## Gene 和 Skill 的关系
 可以把关系理解为：
 ```text
 Gene 1 ── has many ── GeneRevision
 GeneRevision ── references ── Skill snapshot
 GeneRevision ── has many ── GeneEvaluation
 Gene ── has one active ── GeneSelection
 GeneSelection ── selects ── GeneRevision
 Skill ── executes ── actual capability
 ```
 也就是说：
 * `Skill` 解决“能不能做”
 * `Gene` 解决“该保留哪个版本、为什么保留、怎么变体、怎么传播”
 * `GeneRevision` 解决“这个能力在某一刻具体长什么样”
 * `GeneEvaluation` 解决“这个版本是否足够好”
 * `GeneSelection` 解决“当前应该用哪个版本”
 ---
 ## 数据模型
 ### ProjectGene
 ```text
 ProjectGene
  - gene_id
  - project_uuid
  - skill_id
  - skill_slug
  - name
  - description
  - owner
  - status
  - created_at
  - updated_at
 ```
 说明：
 * `gene_id` 是 Gene 的唯一标识
 * `project_uuid` 绑定项目
 * `skill_id` / `skill_slug` 绑定现有 Skill
 * `status` 可为 `active`、`archived`、`deprecated`
 * `owner` 用于责任归属
 ---
 ### ProjectGeneRevision
 ```text
 ProjectGeneRevision
  - revision_id
  - gene_id
  - version
  - parent_revision_id
  - origin
  - source
  - skill_id
  - skill_slug
  - commit_sha
  - blob_hash
  - content_hash
  - content_snapshot_ref
  - mutation_reason
  - mutation_diff
  - created_by
  - created_at
 ```
 说明：
 * `revision_id` 是内部唯一版本节点
 * `version` 是用户可见版本号，不承担唯一性职责
 * `parent_revision_id` 表示单父版本关系
 * `origin` 表示来源，例如 `git_sync`、`manual_edit`、`builtin_template`、`migration`
 * `source` 复用现有 `Skill.source`
 * `commit_sha` / `blob_hash` 记录 Git 来源
 * `content_hash` 记录内容稳定标识
 * `content_snapshot_ref` 用于指向当时的内容快照
 * `mutation_reason` 记录为什么产生这个版本
 * `mutation_diff` 记录相对父版本的变化
 ---
 ### ProjectGeneEvaluation
 ```text
 ProjectGeneEvaluation
  - evaluation_id
  - gene_id
  - revision_id
  - eval_name
  - evaluator
  - dataset_ref
  - dataset_version
  - metric_name
  - score
  - threshold
  - passed
  - sample_count
  - failure_count
  - latency_ms_avg
  - cost
  - result_summary
  - result_json
  - evaluated_at
 ```
 说明：
 * `revision_id` 必填
 * `score` 不应脱离 `metric_name` 单独解释
 * `threshold` 用于判断是否通过
 * `sample_count` 和 `failure_count` 用于判断评估可信度
 * `result_json` 存放详细结果、失败样本、分项指标等
 ---
 ### ProjectGeneSelection
 ```text
 ProjectGeneSelection
  - selection_id
  - gene_id
  - selected_revision_id
  - project_uuid
  - policy
  - reason
  - selected_by
  - selected_at
  - active
 ```
 说明：
 * 同一个 `gene_id` 同一时间只能有一个 active selection
 * `policy` 可以是 `manual`、`latest_passed`、`best_score`、`stable_low_cost`
 * `reason` 用于审计和回滚
 * `selected_by` 可以是用户、系统或自动策略
 ---
 ## 关于 GeneLineage
 MVP 阶段不单独引入 `GeneLineage` 表。
 原因是：如果每个版本只有一个父版本，`ProjectGeneRevision.parent_revision_id` 已经足够表达谱系。
 ```text
 MVP：单父版本树
 Future：多父 DAG / merge / cross-skill inheritance
 ```
 未来如果需要支持合并、交叉继承或复杂谱系，再引入：
 ```text
 ProjectGeneEdge
  - parent_revision_id
  - child_revision_id
  - edge_type
  - mutation_reason
  - mutation_diff
 ```
 ---
 ## 关于 Variant
 `Variant` 是 Gene 的重要能力，但不一定是 MVP 的独立表。
 这些变化都可以先作为新的 `GeneRevision` 表达：
 * 更严格的 prompt
 * 更短的 prompt
 * 不同工具权限
 * 不同上下文注入规则
 * 不同评估阈值
 MVP 中：
 ```text
 不单独引入 ProjectGeneVariant。
 所有变体先作为 GeneRevision 表达。
 ```
 当系统需要以下能力时，再引入 `GeneExperiment` / `GeneVariant`：
 * A/B 实验
 * 并行流量分配
 * 实验分组
 * 统计显著性
 * 多候选版本同时比较
 ---
 ## Git 同步流程
 当前 Git 同步已经会扫描仓库中的 `SKILL.md`，并使用 `commit_sha` 和 `blob_hash` 做增量同步。
 引入 Gene 后，Git 同步流程建议变成：
 ```text
 当 Git 同步发现 SKILL.md 的 blob_hash 变化时：
 1. 正常创建或更新 project_skill。
 2. 查找该 skill 对应的 project_gene。
 3. 如果不存在 project_gene，则创建一个。
 4. 查找该 gene 下最新的 project_gene_revision。
 5. 如果新的 blob_hash / content_hash 不同，则创建新的 GeneRevision。
 6. 将上一 revision 设为 parent_revision_id。
 7. mutation_reason 默认记录为 git_sync。
 8. mutation_diff 记录上一个 SKILL.md 与当前 SKILL.md 的 diff。
 9. 不自动改变 selected_revision，除非选择策略明确允许。
 ```
 关键约束：
 ```text
 Git sync 可以产生新的 GeneRevision。
 Git sync 不应默认切换当前线上选中版本。
 ```
 这样可以避免仓库变更自动导致线上行为漂移。
 ---
 ## 手动编辑流程
 当用户在 UI 中编辑 `Skill` 内容时：
 ```text
 1. 更新 project_skill。
 2. 计算新的 content_hash。
 3. 查找对应 project_gene。
 4. 创建新的 project_gene_revision。
 5. parent_revision_id 指向编辑前的 revision。
 6. mutation_reason 记录为 manual_edit。
 7. mutation_diff 记录编辑前后的差异。
 8. 可选择是否自动把新 revision 标记为 selected。
 ```
 建议 MVP 默认：
 ```text
 手动编辑后创建新 revision，但不自动覆盖 active selection。
 由用户显式选择是否启用该 revision。
 ```
 如果产品希望“编辑即生效”，也可以让 selection 同步更新，但必须记录：
 ```text
 policy = manual_edit_auto_select
 reason = "User edited skill content"
 ```
 ---
 ## 聊天上下文注入流程
 现有流程中，`libs/agent/chat/message_builder.rs` 读取项目中启用的 `Skill`，并把技能注入对话上下文。
 引入 Gene 后，这个原则不变：
 ```text
 message_builder 不直接读取 Gene 内容。
 message_builder 仍然读取启用的 Skill。
 Gene 只影响哪个 Skill revision 被视为推荐版本或当前选中版本。
 ```
 如果未来要让 `GeneSelection` 影响上下文注入，推荐路径是：
 ```text
 GeneSelection
  -> resolve selected GeneRevision
  -> materialize / update project_skill
  -> message_builder reads project_skill
 ```
 不建议：
 ```text
 message_builder
  -> read Gene
  -> assemble prompt
 ```
 因为这会让 `Gene` 偷偷变成新的执行层。
 ---
 ## API 设计
 MVP API 可以包括：
 ```text
 GET    /projects/:project_uuid/skills/:skill_id/gene
 POST   /projects/:project_uuid/skills/:skill_id/gene
 GET    /projects/:project_uuid/genes/:gene_id/revisions
 POST   /projects/:project_uuid/genes/:gene_id/revisions
 GET    /projects/:project_uuid/genes/:gene_id/evaluations
 POST   /projects/:project_uuid/genes/:gene_id/evaluations
 GET    /projects/:project_uuid/genes/:gene_id/selection
 POST   /projects/:project_uuid/genes/:gene_id/select
 ```
 选择接口示例：
 ```json
 {
  "selected_revision_id": "rev_123",
  "policy": "manual",
  "reason": "Higher pass rate on regression eval"
 }
 ```
 评估写入接口示例：
 ```json
 {
  "revision_id": "rev_123",
  "eval_name": "skill_regression_eval",
  "dataset_ref": "datasets/skill-regression-v1",
  "dataset_version": "2026-01-01",
  "metric_name": "task_success_rate",
  "score": 0.92,
  "threshold": 0.85,
  "passed": true,
  "sample_count": 100,
  "failure_count": 8,
  "latency_ms_avg": 1200,
  "cost": 0.34
 }
 ```
 ---
 ## UI 设计
 `Gene` 不替代现有技能管理 UI。
 推荐把它放在 `Skill Detail` 页面中的一个“演化”标签页。
 ```text
 Skill Detail
  - 基本信息
  - 内容编辑
  - 启用状态
  - Evolution / Gene
      - 当前选中 revision
      - 版本列表
      - 父子关系
      - 每个版本的 diff
      - 每个版本的评估结果
      - 选择按钮
      - 回滚按钮
 ```
 MVP UI 可以先做只读：
 ```text
 1. 显示 Gene 信息
 2. 显示 Revision 列表
 3. 显示每个 Revision 的来源、时间、commit_sha、blob_hash
 4. 显示相邻 Revision 的 diff
 ```
 第二阶段再加入：
 ```text
 1. 手动选择 revision
 2. 回滚 revision
 3. 展示评估结果
 4. 根据评估结果推荐版本
 ```
 ---
 ## 评估指标
 Gene 的核心不是“更像生物学”，而是“更像可验证的演化对象”。
 每个 `GeneRevision` 都应该支持评估，例如：
 * 任务成功率
 * 失败率
 * 平均耗时
 * 误触发率
 * 人工接受率
 * 回滚率
 * 成本
 * 稳定性
 * 安全失败数
 * 用户满意度
 没有评估的 Gene，只是重命名后的 Skill 管理。
 ---
 ## 选择策略
 如果存在多个 GeneRevision，系统应该能选择更优版本。
 MVP 阶段不做自动选择，只做人工选择和可审计回滚。
 后续选择规则可以逐步加入：
 ```text
 1. 先看是否通过基础测试
 2. 再看近期成功率
 3. 再看失败率
 4. 再看成本
 5. 再看延迟
 6. 再看稳定性
 7. 再看人工接受率
 ```
 可选策略：
 ```text
 manual
 latest_passed
 best_score
 stable_low_cost
 lowest_latency
 highest_acceptance_rate
 ```
 自动选择必须满足：
 ```text
 有评估数据
 有样本量
 有失败分类
 有回滚机制
 有选择审计记录
 ```
 ---
 ## Migration 策略
 对于已有的 `project_skill`，可以执行一次初始化迁移：
 ```text
 对每个 project_skill：
 1. 创建 project_gene。
 2. 创建初始 project_gene_revision。
 3. revision.origin = migration。
 4. revision.skill_id = project_skill.id。
 5. revision.skill_slug = project_skill.slug。
 6. revision.commit_sha = project_skill.commit_sha。
 7. revision.blob_hash = project_skill.blob_hash。
 8. revision.content_hash = hash(project_skill.content)。
 9. revision.content_snapshot_ref = 当前 Skill 内容快照。
 10. 创建 active project_gene_selection。
 11. selected_revision_id = 初始 revision。
 12. policy = migration。
 13. reason = "Initial gene selection from existing project_skill"。
 ```
 这样现有 Skill 都可以无损进入 Gene 生命周期模型。
 ---
 ## 推荐实现顺序
 ```text
 1. 保持 Skill 执行、扫描、编辑、注入流程不变。
 2. 增加 project_gene 和 project_gene_revision 表。
 3. 为现有 project_skill 执行一次 migration：每个 Skill 创建一个 Gene 和初始 Revision。
 4. 在 Git sync 发现 blob_hash 变化时，自动创建新的 GeneRevision。
 5. 在 Skill 详情页增加只读版本历史和 diff 展示。
 6. 增加 ProjectGeneEvaluation 表和手动写入 API。
 7. 增加 ProjectGeneSelection 表和人工选择 / 回滚能力。
 8. 当评估数据稳定后，再做自动选择策略。
 9. 最后再考虑 Variant / Experiment / A-B 测试。
 ```
 ---
 ## 落地判断标准
 一个能力如果只是：
 * 能被扫描
 * 能被编辑
 * 能被启用或禁用
 * 能被注入上下文
 * 能交付业务能力
 它还是 `Skill`。
 一个能力如果还能：
 * 被追踪来源
 * 被比较版本
 * 被记录变体
 * 被评估好坏
 * 被继承和淘汰
 * 被审计选择
 * 被安全回滚
 它才进入 `Gene` 管理范畴。
 ---
 ## 非目标
 Gene MVP 不做以下事情：
 ```text
 1. 不替换 project_skill。
 2. 不改变 SKILL.md 作为仓库技能事实来源的地位。
 3. 不直接参与聊天上下文构建。
 4. 不直接执行工具调用。
 5. 不自动改写 Skill 内容。
 6. 不在没有评估和回滚机制的情况下自动切换线上版本。
 7. 不一开始支持复杂遗传算法、交叉、随机变异或自动进化。
 8. 不新增一套和 Skill 并列的执行 UI。
 ```
 ---
 ## 风险与约束
 ### 1. Gene 变成另一个 Skill
 风险：
 ```text
 Gene 中也开始存 prompt、工具权限、上下文注入规则，并且聊天时直接读取 Gene。
 ```
 规避：
 ```text
 Gene 不存放执行主内容。
 执行内容仍然归 Skill 所有。
 GeneRevision 只引用或快照 Skill 的某个状态。
 ```
 ---
 ### 2. 评估绑定到可变 Skill
 风险：
 ```text
 评估结果只挂在 skill_id 上，Skill 内容变化后，评估记录失真。
 ```
 规避：
 ```text
 评估必须绑定 revision_id + content_hash / blob_hash。
 ```
 ---
 ### 3. 自动选择过早上线
 风险：
 ```text
 没有足够评估数据时自动切换版本，导致线上行为漂移。
 ```
 规避：
 ```text
 MVP 只做人工选择和可审计回滚。
 自动选择必须依赖稳定评估和回滚机制。
 ```
 ---
 ### 4. mutation_diff 膨胀
 风险：
 ```text
 每个版本都保存完整 diff，长期可能膨胀。
 ```
 规避：
 ```text
 MVP 可直接存 mutation_diff。
 后续引入 mutation_diff_ref 或对象存储引用。
 ```
 ---
 ### 5. version 语义不清
 风险：
 ```text
 version 同时承担用户展示、唯一标识、Git 版本等多种含义。
 ```
 规避：
 ```text
 revision_id = 系统内部唯一版本节点
 version = 用户可见版本号
 commit_sha / blob_hash = Git 来源版本标识
 content_hash = 内容稳定标识
 ```
 ---
 ## 最终结论
 这个项目已经具备 `Skill` 的完整工程闭环。
 `Gene` 的正确位置不是替换它，而是补上它缺失的演化层：
 ```text
 Skill 负责落地执行
 Gene 负责生命周期治理
 GeneRevision 负责不可变版本节点
 GeneEvaluation 负责版本质量判断
 GeneSelection 负责显式选择和回滚
 ```
 两者结合后，技能系统才从“可配置”变成“可进化”。
--- a/index.html
+++ b/index.html
@ -2,13 +2,12 @@
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
-    <link href="/logo.png" rel="icon" type="image/svg+xml"/>
+    <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
-    <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>GitData.AI</title>
+    <title>GitDataAI</title>
    <link rel="preload" href="/@fs/node_modules/@fontsource-variable/geist/files/Geist%5Bwght%5D.woff2" as="font" type="font/woff2" crossorigin />
  </head>
  <body>
    <div id="root"></div>
-<script src="/src/main.tsx" type="module"></script>
+    <script type="module" src="/src/main.tsx"></script>
  </body>
 </html>
--- a/lib.rs
+++ b/lib.rs
@ -1 +0,0 @@
 // Frontend embedding is handled by libs/frontend crate. ci
--- a/libs/agent/Cargo.toml
+++ b/libs/agent/Cargo.toml
@ -1,47 +0,0 @@
 [package]
 name = "agent"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
 description.workspace = true
 repository.workspace = true
 readme.workspace = true
 homepage.workspace = true
 license.workspace = true
 keywords.workspace = true
 categories.workspace = true
 documentation.workspace = true
 [lib]
 path = "lib.rs"
 name = "agent"
 [features]
 default = ["rig"]
 rig = []
 [dependencies]
 rig-core = { workspace = true, features = ["derive"] }
 tokio = { workspace = true }
 async-trait = { workspace = true }
 qdrant-client = { workspace = true }
 sea-orm = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 thiserror = { workspace = true }
 db = { workspace = true }
 config = { path = "../config" }
 models = { workspace = true }
 chrono = { workspace = true }
 uuid = { workspace = true, features = ["v7"] }
 futures = { workspace = true }
 tiktoken-rs = { workspace = true }
 once_cell = { workspace = true }
 regex = { workspace = true }
 tracing = { workspace = true }
 metrics = { workspace = true }
 rust_decimal = { workspace = true }
 reqwest = { workspace = true, features = ["json"] }
 utoipa = { workspace = true }
 tokio-stream = { workspace = true }
 redis = { workspace = true, features = ["tokio-comp"] }
 queue = { workspace = true }
 [lints]
 workspace = true
--- a/libs/agent/agent/mod.rs
+++ b/libs/agent/agent/mod.rs
@ -1,4 +0,0 @@
 //! Rig-based agent using rig's built-in Agent with full feature support.
 pub mod rig_tool;
 pub use rig_tool::{AgentResponse, RigAgentService, StreamChunk};
--- a/libs/agent/agent/rig_tool.rs
+++ b/libs/agent/agent/rig_tool.rs
@ -1,234 +0,0 @@
 use futures::Stream;
 use futures::StreamExt;
 use rig::{
    agent::{AgentBuilder, MultiTurnStreamItem},
    client::CompletionClient,
    completion::Prompt,
    streaming::{StreamedAssistantContent, StreamingPrompt},
 };
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use crate::client::AiClientConfig;
 use crate::error::AgentError;
 #[derive(Debug)]
 pub struct AgentResponse {
    pub content: String,
    pub input_tokens: u64,
    pub output_tokens: u64,
 }
 #[derive(Debug)]
 pub enum StreamChunk {
    Text(String),
    Final {
        content: String,
        input_tokens: u64,
        output_tokens: u64,
    },
 }
 pub struct RigAgentService {
    config: AiClientConfig,
    model_name: String,
 }
 impl RigAgentService {
    pub fn new(config: AiClientConfig, model_name: impl Into<String>) -> Self {
        Self {
            config,
            model_name: model_name.into(),
        }
    }
    pub async fn prompt(
        &self,
        system_prompt: &str,
        user_input: &str,
    ) -> std::result::Result<AgentResponse, AgentError> {
        let client = self.config.build_rig_client();
        let model = client.completion_model(&self.model_name);
        let agent = AgentBuilder::new(model).preamble(system_prompt).build();
        let response = agent
            .prompt(user_input)
            .extended_details()
            .await
            .map_err(|e: rig::completion::PromptError| AgentError::OpenAi(e.to_string()))?;
        Ok(AgentResponse {
            content: response.output,
            input_tokens: response.usage.input_tokens,
            output_tokens: response.usage.output_tokens,
        })
    }
    pub async fn prompt_with_tools(
        &self,
        system_prompt: &str,
        user_input: &str,
        tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>>,
        max_turns: usize,
    ) -> std::result::Result<AgentResponse, AgentError> {
        let client = self.config.build_rig_client();
        let model = client.completion_model(&self.model_name);
        let agent = AgentBuilder::new(model)
            .preamble(system_prompt)
            .tools(tools)
            .default_max_turns(max_turns)
            .build();
        let response = agent
            .prompt(user_input)
            .max_turns(max_turns)
            .extended_details()
            .await
            .map_err(|e: rig::completion::PromptError| AgentError::OpenAi(e.to_string()))?;
        Ok(AgentResponse {
            content: response.output,
            input_tokens: response.usage.input_tokens,
            output_tokens: response.usage.output_tokens,
        })
    }
    pub async fn stream_prompt(
        &self,
        system_prompt: &str,
        user_input: &str,
    ) -> std::result::Result<
        impl Stream<Item = std::result::Result<StreamChunk, AgentError>>,
        AgentError,
    > {
        let client = self.config.build_rig_client();
        let model = client.completion_model(&self.model_name);
        let agent = AgentBuilder::new(model).preamble(system_prompt).build();
        let stream: rig::agent::StreamingResult<_> = agent.stream_prompt(user_input).await;
        let (tx, rx) = mpsc::channel::<std::result::Result<StreamChunk, AgentError>>(100);
        tokio::spawn(async move {
            let mut final_content = String::new();
            tokio::pin!(stream);
            while let Some(item) = stream.next().await {
                match item {
                    Ok(MultiTurnStreamItem::StreamAssistantItem(
                        StreamedAssistantContent::Text(text),
                    )) => {
                        let _ = tx.send(Ok(StreamChunk::Text(text.text.clone()))).await;
                        final_content.push_str(&text.text);
                    }
                    Ok(MultiTurnStreamItem::StreamAssistantItem(
                        StreamedAssistantContent::ToolCall {
                            tool_call,
                            internal_call_id: _,
                        },
                    )) => {
                        let args_str = match &tool_call.function.arguments {
                            serde_json::Value::String(s) => s.clone(),
                            v => serde_json::to_string(v).unwrap_or_default(),
                        };
                        tracing::info!(
                            tool = %tool_call.function.name,
                            args = %args_str,
                            "rig_agent_streaming_tool_call"
                        );
                    }
                    Ok(MultiTurnStreamItem::StreamUserItem(
                        rig::streaming::StreamedUserContent::ToolResult { tool_result, .. },
                    )) => {
                        tracing::info!(
                            tool_result_id = %tool_result.id,
                            "rig_agent_streaming_tool_result"
                        );
                    }
                    Ok(MultiTurnStreamItem::FinalResponse(resp)) => {
                        let usage = resp.usage();
                        let _ = tx
                            .send(Ok(StreamChunk::Final {
                                content: final_content.clone(),
                                input_tokens: usage.input_tokens,
                                output_tokens: usage.output_tokens,
                            }))
                            .await;
                    }
                    Err(e) => {
                        let _ = tx.send(Err(AgentError::OpenAi(e.to_string()))).await;
                    }
                    _ => {}
                }
            }
        });
        Ok(ReceiverStream::new(rx))
    }
    pub async fn stream_prompt_with_tools(
        &self,
        system_prompt: &str,
        user_input: &str,
        tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>>,
        max_turns: usize,
    ) -> std::result::Result<
        impl Stream<Item = std::result::Result<StreamChunk, AgentError>>,
        AgentError,
    > {
        let client = self.config.build_rig_client();
        let model = client.completion_model(&self.model_name);
        let agent = AgentBuilder::new(model)
            .preamble(system_prompt)
            .tools(tools)
            .default_max_turns(max_turns)
            .build();
        let stream = agent
            .stream_prompt(user_input)
            .with_history(Vec::<rig::completion::Message>::new())
            .multi_turn(max_turns)
            .await;
        let (tx, rx) = mpsc::channel::<Result<StreamChunk, AgentError>>(100);
        tokio::spawn(async move {
            let mut final_content = String::new();
            tokio::pin!(stream);
            while let Some(item) = stream.next().await {
                match item {
                    Ok(MultiTurnStreamItem::StreamAssistantItem(
                        StreamedAssistantContent::Text(text),
                    )) => {
                        let _ = tx.send(Ok(StreamChunk::Text(text.text.clone()))).await;
                        final_content.push_str(&text.text);
                    }
                    Ok(MultiTurnStreamItem::FinalResponse(resp)) => {
                        let usage = resp.usage();
                        let _ = tx
                            .send(Ok(StreamChunk::Final {
                                content: final_content.clone(),
                                input_tokens: usage.input_tokens,
                                output_tokens: usage.output_tokens,
                            }))
                            .await;
                    }
                    Err(e) => {
                        let _ = tx.send(Err(AgentError::OpenAi(e.to_string()))).await;
                    }
                    _ => {}
                }
            }
        });
        Ok(ReceiverStream::new(rx))
    }
    pub fn count_tokens(&self, text: &str) -> Result<usize, AgentError> {
        crate::tokent::count_text(text, &self.model_name)
            .map_err(|e| AgentError::Internal(e.to_string()))
    }
 }
--- a/libs/agent/billing.rs
+++ b/libs/agent/billing.rs
@ -1,668 +0,0 @@
 //! Billing service — handles user-level and project-level billing, deduction,
 //! credit initialization, and error persistence.
 //!
 //! Architecture:
 //!   - Each user gets $10 personal balance on signup.
 //!   - Each project gets $20 balance only if it's the creator's first project,
 //!     $0 otherwise.
 //!   - AI usage is deducted from the project balance first; if insufficient,
 //!     falls through to the user's personal balance.
 //!   - Monthly quota only applies to pro users (is_pro = true).
 //!   - If both project and user balance are insufficient, a billing_error
 //!     record is persisted and an error is returned to the caller.
 use db::database::AppDatabase;
 use models::agents::model_pricing;
 use models::ai::billing_error;
 use models::projects::{project, project_billing, project_billing_history};
 use models::users::{user_billing, user_billing_history};
 use rust_decimal::Decimal;
 use sea_orm::*;
 use uuid::Uuid;
 use crate::error::AgentError;
 fn default_user_balance() -> Decimal {
    Decimal::new(100_000, 4)
 } // $10.0000
 fn first_project_credit() -> Decimal {
    Decimal::new(200_000, 4)
 } // $20.0000
 const SUBSEQUENT_PROJECT_BALANCE: Decimal = Decimal::ZERO;
 #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
 pub struct BillingRecord {
    pub cost: f64,
    pub currency: String,
    pub input_tokens: i64,
    pub output_tokens: i64,
    pub deducted_from: String, // "project" or "user"
 }
 #[derive(Debug)]
 pub enum BillingResult {
    Success(BillingRecord),
    InsufficientBalance { message: String },
 }
 /// Record AI usage: deduct from project balance first, fall through to user balance.
 ///
 /// Returns `InsufficientBalance` if neither account can cover the cost.
 /// On insufficient balance, a `billing_error` record is persisted for frontend display.
 pub async fn record_ai_usage(
    db: &AppDatabase,
    project_uid: Uuid,
    user_uid: Uuid,
    model_id: Uuid,
    input_tokens: i64,
    output_tokens: i64,
 ) -> Result<BillingResult, AgentError> {
    let total_cost = compute_cost(db, model_id, input_tokens, output_tokens).await?;
    let currency = get_currency(db, model_id).await?;
    // Verify project exists
    let _ = project::Entity::find_by_id(project_uid)
        .one(db)
        .await?
        .ok_or_else(|| AgentError::Internal("Project not found".into()))?;
    // Attempt project-level deduction first
    let project_result = deduct_from_project(
        db,
        project_uid,
        total_cost,
        &currency,
        model_id,
        input_tokens,
        output_tokens,
    )
    .await;
    match project_result {
        Ok(()) => {
            let cost_f64 = decimal_to_f64(total_cost);
            tracing::info!(
                project_id = %project_uid,
                model_id = %model_id,
                input_tokens, output_tokens,
                cost = %cost_f64,
                currency = %currency,
                deducted_from = "project",
                "ai_usage_recorded"
            );
            Ok(BillingResult::Success(BillingRecord {
                cost: cost_f64,
                currency,
                input_tokens,
                output_tokens,
                deducted_from: "project".to_string(),
            }))
        }
        Err(_) => {
            // Project balance insufficient — try user personal balance
            let user_result = deduct_from_user(
                db,
                user_uid,
                total_cost,
                &currency,
                project_uid,
                model_id,
                input_tokens,
                output_tokens,
            )
            .await;
            match user_result {
                Ok(()) => {
                    let cost_f64 = decimal_to_f64(total_cost);
                    tracing::info!(
                        user_id = %user_uid,
                        project_id = %project_uid,
                        model_id = %model_id,
                        input_tokens, output_tokens,
                        cost = %cost_f64,
                        currency = %currency,
                        deducted_from = "user",
                        "ai_usage_recorded"
                    );
                    Ok(BillingResult::Success(BillingRecord {
                        cost: cost_f64,
                        currency,
                        input_tokens,
                        output_tokens,
                        deducted_from: "user".to_string(),
                    }))
                }
                Err(insufficient_msg) => {
                    // Both project and user balance insufficient — persist error
                    persist_billing_error(
                        db,
                        "project",
                        project_uid,
                        "insufficient_balance",
                        &insufficient_msg,
                        Some(serde_json::json!({
                            "user_id": user_uid.to_string(),
                            "model_id": model_id.to_string(),
                            "input_tokens": input_tokens,
                            "output_tokens": output_tokens,
                            "cost": decimal_to_f64(total_cost),
                            "currency": currency,
                        })),
                    )
                    .await?;
                    Ok(BillingResult::InsufficientBalance {
                        message: insufficient_msg,
                    })
                }
            }
        }
    }
 }
 /// Record personal AI usage against the user's own billing balance.
 pub async fn record_user_ai_usage(
    db: &AppDatabase,
    user_uid: Uuid,
    model_id: Uuid,
    input_tokens: i64,
    output_tokens: i64,
 ) -> Result<BillingResult, AgentError> {
    let total_cost = compute_cost(db, model_id, input_tokens, output_tokens).await?;
    let currency = get_currency(db, model_id).await?;
    match deduct_from_user_personal(
        db,
        user_uid,
        total_cost,
        &currency,
        model_id,
        input_tokens,
        output_tokens,
    )
    .await
    {
        Ok(()) => {
            let cost_f64 = decimal_to_f64(total_cost);
            tracing::info!(
                user_id = %user_uid,
                model_id = %model_id,
                input_tokens, output_tokens,
                cost = %cost_f64,
                currency = %currency,
                deducted_from = "user",
                scope = "personal",
                "ai_usage_recorded"
            );
            Ok(BillingResult::Success(BillingRecord {
                cost: cost_f64,
                currency,
                input_tokens,
                output_tokens,
                deducted_from: "user".to_string(),
            }))
        }
        Err(insufficient_msg) => {
            persist_billing_error(
                db,
                "user",
                user_uid,
                "insufficient_balance",
                &insufficient_msg,
                Some(serde_json::json!({
                    "user_id": user_uid.to_string(),
                    "model_id": model_id.to_string(),
                    "input_tokens": input_tokens,
                    "output_tokens": output_tokens,
                    "cost": decimal_to_f64(total_cost),
                    "currency": currency,
                    "scope": "personal",
                })),
            )
            .await?;
            Ok(BillingResult::InsufficientBalance {
                message: insufficient_msg,
            })
        }
    }
 }
 /// Check whether a project + user has sufficient combined balance for a potential AI call.
 /// Called before starting AI processing to avoid wasted compute.
 pub async fn check_balance(
    db: &AppDatabase,
    project_uid: Uuid,
    user_uid: Uuid,
    model_id: Uuid,
    estimated_input_tokens: i64,
    estimated_output_tokens: i64,
 ) -> Result<bool, AgentError> {
    let estimated_cost = compute_cost(
        db,
        model_id,
        estimated_input_tokens,
        estimated_output_tokens,
    )
    .await?;
    let project_balance = get_project_balance(db, project_uid).await;
    let user_balance = get_user_balance(db, user_uid).await;
    Ok(project_balance + user_balance >= estimated_cost)
 }
 /// Check whether a user's personal balance can cover a potential AI call.
 pub async fn check_user_balance(
    db: &AppDatabase,
    user_uid: Uuid,
    model_id: Uuid,
    estimated_input_tokens: i64,
    estimated_output_tokens: i64,
 ) -> Result<bool, AgentError> {
    let estimated_cost = compute_cost(
        db,
        model_id,
        estimated_input_tokens,
        estimated_output_tokens,
    )
    .await?;
    let user_balance = get_user_balance(db, user_uid).await;
    Ok(user_balance >= estimated_cost)
 }
 // ── Initialization ──
 /// Initialize a user billing account with the default $10 balance.
 /// Called on user signup / first login.
 pub async fn initialize_user_billing(db: &AppDatabase, user_uid: Uuid) -> Result<(), AgentError> {
    let now = chrono::Utc::now();
    user_billing::ActiveModel {
        user: Set(user_uid),
        balance: Set(default_user_balance()),
        currency: Set("USD".to_string()),
        is_pro: Set(false),
        monthly_quota: Set(Decimal::ZERO),
        month_used: Set(Decimal::ZERO),
        cycle_start: Set(None),
        cycle_end: Set(None),
        updated_at: Set(now),
        created_at: Set(now),
    }
    .insert(db)
    .await
    .map_err(|e| AgentError::Internal(format!("failed to create user billing: {}", e)))?;
    tracing::info!(user_id = %user_uid, balance = "$10", "user_billing_initialized");
    Ok(())
 }
 /// Initialize a project billing account.
 /// Grants $20 only if this is the creator's first project; $0 otherwise.
 pub async fn initialize_project_billing(
    db: &AppDatabase,
    project_uid: Uuid,
    creator_uid: Uuid,
 ) -> Result<(), AgentError> {
    // Check how many projects this user has already created
    let existing_count = project::Entity::find()
        .filter(project::Column::CreatedBy.eq(creator_uid))
        .filter(project::Column::Id.ne(project_uid))
        .count(db)
        .await
        .map_err(|e| AgentError::Internal(format!("failed to count user projects: {}", e)))?;
    let is_first = existing_count == 0;
    let initial_balance = if is_first {
        first_project_credit()
    } else {
        SUBSEQUENT_PROJECT_BALANCE
    };
    let now = chrono::Utc::now();
    project_billing::ActiveModel {
        project: Set(project_uid),
        balance: Set(initial_balance),
        currency: Set("USD".to_string()),
        user: Set(Some(creator_uid)),
        initial_credit_granted: Set(is_first),
        is_pro: Set(false),
        monthly_quota: Set(Decimal::ZERO),
        month_used: Set(Decimal::ZERO),
        cycle_start: Set(None),
        cycle_end: Set(None),
        updated_at: Set(now),
        created_at: Set(now),
    }
    .insert(db)
    .await
    .map_err(|e| AgentError::Internal(format!("failed to create project billing: {}", e)))?;
    if is_first {
        // Record the credit in billing history
        project_billing_history::ActiveModel {
            uid: Set(Uuid::new_v4()),
            project: Set(project_uid),
            user: Set(Some(creator_uid)),
            amount: Set(first_project_credit()),
            currency: Set("USD".to_string()),
            reason: Set("first_project_credit".to_string()),
            extra: Set(Some(serde_json::json!({
                "is_first_project": true,
            }))),
            created_at: Set(now),
            ..Default::default()
        }
        .insert(db)
        .await
        .map_err(|e| AgentError::Internal(format!("failed to record credit history: {}", e)))?;
    }
    tracing::info!(
        project_id = %project_uid,
        creator_id = %creator_uid,
        is_first_project = is_first,
        balance = if is_first { "$20" } else { "$0" },
        "project_billing_initialized"
    );
    Ok(())
 }
 // ── Internal helpers ──
 async fn compute_cost(
    db: &AppDatabase,
    model_id: Uuid,
    input_tokens: i64,
    output_tokens: i64,
 ) -> Result<Decimal, AgentError> {
    let pricing = model_pricing::Entity::find()
        .filter(model_pricing::Column::ModelVersionId.eq(model_id))
        .order_by_desc(model_pricing::Column::EffectiveFrom)
        .one(db)
        .await?
        .ok_or_else(|| {
            AgentError::Internal(
                "No pricing record found for this model. Please configure AI model pricing first."
                    .into(),
            )
        })?;
    let input_price: Decimal = pricing
        .input_price_per_1k_tokens
        .parse()
        .map_err(|e| AgentError::Internal(format!("Invalid input price: {}", e)))?;
    let output_price: Decimal = pricing
        .output_price_per_1k_tokens
        .parse()
        .map_err(|e| AgentError::Internal(format!("Invalid output price: {}", e)))?;
    if input_price <= Decimal::ZERO && output_price <= Decimal::ZERO {
        return Err(AgentError::Internal(
            "Model pricing is not configured or is zero. Please configure non-zero AI model pricing first."
                .into(),
        ));
    }
    // DB stores per-1M-token prices; divide tokens by 1M to compute cost.
    let million = Decimal::from(1_000_000);
    Ok((Decimal::from(input_tokens) / million) * input_price
        + (Decimal::from(output_tokens) / million) * output_price)
 }
 async fn get_currency(db: &AppDatabase, model_id: Uuid) -> Result<String, AgentError> {
    let pricing = model_pricing::Entity::find()
        .filter(model_pricing::Column::ModelVersionId.eq(model_id))
        .one(db)
        .await?
        .ok_or_else(|| AgentError::Internal("No pricing found".into()))?;
    Ok(pricing.currency.clone())
 }
 async fn get_project_balance(db: &AppDatabase, project_uid: Uuid) -> Decimal {
    project_billing::Entity::find_by_id(project_uid)
        .one(db)
        .await
        .ok()
        .flatten()
        .map(|b| b.balance)
        .unwrap_or(Decimal::ZERO)
 }
 async fn get_user_balance(db: &AppDatabase, user_uid: Uuid) -> Decimal {
    user_billing::Entity::find_by_id(user_uid)
        .one(db)
        .await
        .ok()
        .flatten()
        .map(|b| b.balance)
        .unwrap_or(Decimal::ZERO)
 }
 async fn deduct_from_project(
    db: &AppDatabase,
    project_uid: Uuid,
    cost: Decimal,
    currency: &str,
    model_id: Uuid,
    input_tokens: i64,
    output_tokens: i64,
 ) -> Result<(), String> {
    let txn = db
        .begin()
        .await
        .map_err(|e| format!("db txn error: {}", e))?;
    let billing = project_billing::Entity::find_by_id(project_uid)
        .lock_exclusive()
        .one(&txn)
        .await
        .map_err(|e| format!("db error: {}", e))?
        .ok_or_else(|| "Project billing account not found".to_string())?;
    if billing.balance < cost {
        txn.rollback().await.ok();
        return Err(format!(
            "Project balance insufficient. Required: {:.4} {}, Available: {:.4} {}",
            cost, currency, billing.balance, currency
        ));
    }
    let now = chrono::Utc::now();
    project_billing_history::ActiveModel {
        uid: Set(Uuid::new_v4()),
        project: Set(project_uid),
        user: Set(None),
        amount: Set(-cost),
        currency: Set(currency.to_string()),
        reason: Set("ai_usage".to_string()),
        extra: Set(Some(serde_json::json!({
            "model_id": model_id.to_string(),
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "deducted_from": "project",
        }))),
        created_at: Set(now),
        ..Default::default()
    }
    .insert(&txn)
    .await
    .map_err(|e| format!("failed to insert history: {}", e))?;
    let mut updated: project_billing::ActiveModel = billing.into();
    updated.balance = Set(updated.balance.unwrap() - cost);
    updated.updated_at = Set(now);
    updated
        .update(&txn)
        .await
        .map_err(|e| format!("failed to update balance: {}", e))?;
    txn.commit()
        .await
        .map_err(|e| format!("commit error: {}", e))?;
    Ok(())
 }
 async fn deduct_from_user(
    db: &AppDatabase,
    user_uid: Uuid,
    cost: Decimal,
    currency: &str,
    project_uid: Uuid,
    model_id: Uuid,
    input_tokens: i64,
    output_tokens: i64,
 ) -> Result<(), String> {
    let txn = db
        .begin()
        .await
        .map_err(|e| format!("db txn error: {}", e))?;
    let billing = user_billing::Entity::find_by_id(user_uid)
        .lock_exclusive()
        .one(&txn)
        .await
        .map_err(|e| format!("db error: {}", e))?
        .ok_or_else(|| "User billing account not found".to_string())?;
    if billing.balance < cost {
        txn.rollback().await.ok();
        return Err(format!(
            "Insufficient balance (project + user). Project: unavailable, User: {:.4} {}. Required: {:.4} {}",
            billing.balance, currency, cost, currency
        ));
    }
    let now = chrono::Utc::now();
    // Record in project billing history (but deducted from user)
    project_billing_history::ActiveModel {
        uid: Set(Uuid::new_v4()),
        project: Set(project_uid),
        user: Set(Some(user_uid)),
        amount: Set(-cost),
        currency: Set(currency.to_string()),
        reason: Set("ai_usage_user_fallback".to_string()),
        extra: Set(Some(serde_json::json!({
            "model_id": model_id.to_string(),
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "deducted_from": "user",
        }))),
        created_at: Set(now),
        ..Default::default()
    }
    .insert(&txn)
    .await
    .map_err(|e| format!("failed to insert history: {}", e))?;
    let mut updated: user_billing::ActiveModel = billing.into();
    updated.balance = Set(updated.balance.unwrap() - cost);
    updated.updated_at = Set(now);
    updated
        .update(&txn)
        .await
        .map_err(|e| format!("failed to update user balance: {}", e))?;
    txn.commit()
        .await
        .map_err(|e| format!("commit error: {}", e))?;
    Ok(())
 }
 async fn deduct_from_user_personal(
    db: &AppDatabase,
    user_uid: Uuid,
    cost: Decimal,
    currency: &str,
    model_id: Uuid,
    input_tokens: i64,
    output_tokens: i64,
 ) -> Result<(), String> {
    let txn = db
        .begin()
        .await
        .map_err(|e| format!("db txn error: {}", e))?;
    let billing = user_billing::Entity::find_by_id(user_uid)
        .lock_exclusive()
        .one(&txn)
        .await
        .map_err(|e| format!("db error: {}", e))?
        .ok_or_else(|| "User billing account not found".to_string())?;
    if billing.balance < cost {
        txn.rollback().await.ok();
        return Err(format!(
            "Insufficient balance. User: {:.4} {}. Required: {:.4} {}",
            billing.balance, billing.currency, cost, billing.currency
        ));
    }
    let now = chrono::Utc::now();
    user_billing_history::ActiveModel {
        uid: Set(Uuid::new_v4()),
        user: Set(user_uid),
        amount: Set(-cost),
        currency: Set(currency.to_string()),
        reason: Set("ai_usage_personal".to_string()),
        extra: Set(Some(serde_json::json!({
            "model_id": model_id.to_string(),
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "deducted_from": "user",
            "scope": "personal",
        }))),
        created_at: Set(now),
        ..Default::default()
    }
    .insert(&txn)
    .await
    .map_err(|e| format!("failed to insert user history: {}", e))?;
    let mut updated: user_billing::ActiveModel = billing.into();
    updated.balance = Set(updated.balance.unwrap() - cost);
    updated.updated_at = Set(now);
    updated
        .update(&txn)
        .await
        .map_err(|e| format!("failed to update user balance: {}", e))?;
    txn.commit()
        .await
        .map_err(|e| format!("commit error: {}", e))?;
    Ok(())
 }
 pub async fn persist_billing_error(
    db: &AppDatabase,
    scope: &str,
    scope_id: Uuid,
    error_type: &str,
    message: &str,
    details: Option<serde_json::Value>,
 ) -> Result<(), AgentError> {
    billing_error::ActiveModel {
        id: Set(Uuid::new_v4()),
        scope: Set(scope.to_string()),
        scope_id: Set(scope_id),
        error_type: Set(error_type.to_string()),
        message: Set(message.to_string()),
        details: Set(details),
        resolved: Set(false),
        created_at: Set(chrono::Utc::now()),
    }
    .insert(db)
    .await
    .map_err(|e| AgentError::Internal(format!("failed to persist billing error: {}", e)))?;
    tracing::warn!(scope, %scope_id, error_type, "billing_error_persisted");
    Ok(())
 }
 fn decimal_to_f64(d: Decimal) -> f64 {
    d.round_dp(10).to_string().parse().unwrap_or(0.0)
 }
--- a/libs/agent/chat/chat_execution.rs
+++ b/libs/agent/chat/chat_execution.rs
--- a/libs/agent/chat/mod.rs
+++ b/libs/agent/chat/mod.rs
@ -1,163 +0,0 @@
 use std::pin::Pin;
 use config::AppConfig;
 use db::cache::AppCache;
 use db::database::AppDatabase;
 use models::agents::model;
 use models::projects::{project, project_context_setting};
 use models::repos::repo;
 use models::rooms::{room, room_message};
 use models::users::user;
 use std::collections::HashMap;
 use uuid::Uuid;
 /// Maximum recursion rounds for tool-call loops (AI → tool → result → AI).
 /// Previous default of 3 caused frequent silent termination on realistic multi-step queries.
 pub const DEFAULT_MAX_TOOL_DEPTH: usize = 99;
 /// A single chunk from an AI streaming response.
 #[derive(Debug, Clone)]
 pub struct AiStreamChunk {
    pub content: String,
    pub done: bool,
    /// What kind of content this chunk contains — helps the frontend render
    /// thinking, tool calls, and results with different styles.
    pub chunk_type: AiChunkType,
    /// Structured metadata for tool_call / tool_result events.
    /// tool_call:  {"tool": "...", "args": {...}}
    /// tool_result: {"tool": "...", "status": "ok|error", "result": "..."}
    pub metadata: Option<serde_json::Value>,
    /// Optional ID of a child process/agent, sent to frontend via SSE.
    pub children_id: Option<String>,
 }
 /// Type of streaming chunk, used by the frontend for rendering.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum AiChunkType {
    /// AI reasoning/thinking text before a tool call or answer.
    Thinking,
    /// Final answer text from the AI.
    Answer,
    /// A tool call is being executed (content = tool name + args summary).
    ToolCall,
    /// Tool execution result (content = result or error).
    ToolResult,
 }
 impl Default for AiChunkType {
    fn default() -> Self {
        Self::Answer
    }
 }
 const THINK_OPEN: &str = "\x3cthinking\x3e";
 const THINK_CLOSE: &str = "\x3c/response\x3e";
 /// Strip XML-format thinking tags that some models (e.g. DeepSeek-R1) embed
 /// in reasoning output. Also normalizes excessive consecutive newlines (3+ → 2).
 pub fn normalize_thinking_content(content: &str) -> String {
    let content = content
        .replace(THINK_CLOSE, "")
        .replace(THINK_OPEN, "")
        .replace("\x3cthinking", "")
        .replace("/response\x3e", "");
    let mut result = String::with_capacity(content.len());
    let mut newline_count = 0usize;
    for ch in content.chars() {
        if ch == '\n' {
            newline_count += 1;
            if newline_count <= 2 {
                result.push(ch);
            }
        } else {
            newline_count = 0;
            result.push(ch);
        }
    }
    result.trim().to_string()
 }
 pub type StreamCallback = Box<
    dyn Fn(AiStreamChunk) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync,
 >;
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum AgentRole {
    Default,
    Supervisor,
    Researcher,
    Analyst,
    Reviewer,
    Architect,
    Debugger,
    Implementer,
    Tester,
    Security,
 }
 #[derive(Debug, Clone, Default)]
 pub struct AgentExecutionProfile {
    pub role: AgentRole,
    pub system_prompt: Option<String>,
    pub temperature: Option<f64>,
    pub max_tokens: Option<i32>,
    pub top_p: Option<f64>,
    pub frequency_penalty: Option<f64>,
    pub presence_penalty: Option<f64>,
    pub max_tool_depth: Option<usize>,
    pub allowed_tools: Option<Vec<String>>,
    pub disable_orchestration: bool,
 }
 impl Default for AgentRole {
    fn default() -> Self {
        Self::Default
    }
 }
 #[derive(Clone)]
 pub struct AiRequest {
    pub db: AppDatabase,
    pub cache: AppCache,
    pub config: AppConfig,
    pub model: model::Model,
    pub project: project::Model,
    pub context_setting: Option<project_context_setting::Model>,
    pub sender: user::Model,
    pub room: room::Model,
    pub input: String,
    pub mention: Vec<Mention>,
    pub history: Vec<room_message::Model>,
    pub history_cutoff_seq: Option<i64>,
    pub user_names: HashMap<Uuid, String>,
    pub temperature: f64,
    pub max_tokens: i32,
    pub top_p: f64,
    pub frequency_penalty: f64,
    pub presence_penalty: f64,
    pub think: bool,
    pub tools: Option<Vec<serde_json::Value>>,
    pub max_tool_depth: usize,
    pub execution_profile: Option<AgentExecutionProfile>,
    pub room_preamble: Option<String>,
 }
 #[derive(Clone)]
 pub enum Mention {
    User(user::Model),
    Repo(repo::Model),
 }
 pub mod agent_profile;
 pub mod chat_execution;
 pub mod context;
 pub mod message_builder;
 pub mod nonstreaming_execution;
 pub mod orchestrator;
 pub mod react_execution;
 pub mod service;
 pub mod session_recording;
 pub mod state;
 pub mod streaming_execution;
 pub use context::{AiContextSenderType, RoomMessageContext};
 pub use service::ChatService;
 pub use state::{AgentRuntime, AgentState};
--- a/libs/agent/chat/orchestrator.rs
+++ b/libs/agent/chat/orchestrator.rs
@ -1,317 +0,0 @@
 use std::collections::HashMap;
 use super::agent_profile::{profile_for_role_name, should_enable_delegation, supervisor_profile};
 use super::message_builder::MessageBuilder;
 use super::nonstreaming_execution::execute_process;
 use super::service::{ProcessResult, StreamResult};
 use super::{AiRequest, StreamCallback};
 use crate::error::Result;
 use crate::tool::call::ToolError;
 use crate::tool::registry::ToolRegistry;
 use crate::tool::{ToolDefinition, ToolHandler, ToolParam, ToolSchema};
 pub async fn execute_orchestrated_process(
    request: AiRequest,
    message_builder: &MessageBuilder,
    tool_registry: &Option<ToolRegistry>,
    ai_base_url: Option<String>,
    ai_api_key: Option<String>,
 ) -> Result<ProcessResult> {
    if request
        .execution_profile
        .as_ref()
        .is_some_and(|p| p.disable_orchestration)
    {
        return execute_process(
            request,
            message_builder,
            tool_registry,
            ai_base_url,
            ai_api_key,
        )
        .await;
    }
    let tools = request.tools.clone().unwrap_or_default();
    if !should_enable_delegation(&request.input, !tools.is_empty()) {
        return execute_process(
            request,
            message_builder,
            tool_registry,
            ai_base_url,
            ai_api_key,
        )
        .await;
    }
    let mut enhanced_registry = tool_registry.clone().unwrap_or_default();
    register_call_sub_agent_tool(
        &mut enhanced_registry,
        &request,
        message_builder,
        tool_registry,
        ai_base_url.clone(),
        ai_api_key.clone(),
    );
    let mut supervisor_request = request.clone();
    let profile = supervisor_profile();
    supervisor_request.execution_profile = Some(profile.clone());
    supervisor_request.tools = Some(enhanced_registry.to_openai_tools());
    supervisor_request.temperature = profile.temperature.unwrap_or(request.temperature);
    supervisor_request.max_tokens = profile.max_tokens.unwrap_or(request.max_tokens);
    supervisor_request.top_p = profile.top_p.unwrap_or(request.top_p);
    supervisor_request.frequency_penalty = profile
        .frequency_penalty
        .unwrap_or(request.frequency_penalty);
    supervisor_request.presence_penalty =
        profile.presence_penalty.unwrap_or(request.presence_penalty);
    execute_process(
        supervisor_request,
        message_builder,
        &Some(enhanced_registry),
        ai_base_url,
        ai_api_key,
    )
    .await
 }
 pub async fn execute_orchestrated_stream(
    request: AiRequest,
    on_chunk: StreamCallback,
    message_builder: &MessageBuilder,
    tool_registry: &Option<ToolRegistry>,
    ai_base_url: Option<String>,
    ai_api_key: Option<String>,
 ) -> Result<StreamResult> {
    if request
        .execution_profile
        .as_ref()
        .is_some_and(|p| p.disable_orchestration)
    {
        return super::streaming_execution::execute_process_stream(
            request,
            on_chunk,
            message_builder,
            tool_registry,
            ai_base_url,
            ai_api_key,
        )
        .await;
    }
    let tools = request.tools.clone().unwrap_or_default();
    if !should_enable_delegation(&request.input, !tools.is_empty()) {
        return super::streaming_execution::execute_process_stream(
            request,
            on_chunk,
            message_builder,
            tool_registry,
            ai_base_url,
            ai_api_key,
        )
        .await;
    }
    let mut enhanced_registry = tool_registry.clone().unwrap_or_default();
    register_call_sub_agent_tool(
        &mut enhanced_registry,
        &request,
        message_builder,
        tool_registry,
        ai_base_url.clone(),
        ai_api_key.clone(),
    );
    let mut supervisor_request = request.clone();
    let profile = supervisor_profile();
    supervisor_request.execution_profile = Some(profile.clone());
    supervisor_request.tools = Some(enhanced_registry.to_openai_tools());
    supervisor_request.temperature = profile.temperature.unwrap_or(request.temperature);
    supervisor_request.max_tokens = profile.max_tokens.unwrap_or(request.max_tokens);
    supervisor_request.top_p = profile.top_p.unwrap_or(request.top_p);
    supervisor_request.frequency_penalty = profile
        .frequency_penalty
        .unwrap_or(request.frequency_penalty);
    supervisor_request.presence_penalty =
        profile.presence_penalty.unwrap_or(request.presence_penalty);
    super::streaming_execution::execute_process_stream(
        supervisor_request,
        on_chunk,
        message_builder,
        &Some(enhanced_registry),
        ai_base_url,
        ai_api_key,
    )
    .await
 }
 fn register_call_sub_agent_tool(
    registry: &mut ToolRegistry,
    request: &AiRequest,
    message_builder: &MessageBuilder,
    original_registry: &Option<ToolRegistry>,
    ai_base_url: Option<String>,
    ai_api_key: Option<String>,
 ) {
    let captured_request = request.clone();
    let captured_message_builder = message_builder.clone();
    let captured_original_registry = original_registry.clone();
    let captured_base_url = ai_base_url;
    let captured_api_key = ai_api_key;
    registry.register(
        ToolDefinition::new("call_sub_agent")
            .description(
                "Delegate a task to a specialist sub-agent and receive its output.\n\
                 Available roles:\n\
                 - researcher: Gathers facts, evidence, and data. Best for finding information and searching code.\n\
                 - analyst: Builds explanations, highlights causal links and tradeoffs. Best for reasoning about implications.\n\
                 - reviewer: Stress-tests proposals, identifies risks and contradictions. Best for quality checks.\n\
                 - architect: Maps systems, dependencies, boundaries, and design tradeoffs. Best for architecture decisions.\n\
                 - debugger: Finds root causes, suspect changes, and validation paths. Best for bugs and regressions.\n\
                 - implementer: Converts requirements into concrete implementation steps. Best for execution planning.\n\
                 - tester: Designs validation and regression coverage. Best for test strategy.\n\
                 - security: Reviews auth, data exposure, injection, dependency, and abuse risks. Best for sensitive changes.\n\
                 Provide a clear, focused task description so the sub-agent knows exactly what to investigate.",
            )
            .parameters(ToolSchema {
                schema_type: "object".into(),
                properties: Some({
                    let mut p = HashMap::new();
                    p.insert(
                        "role".into(),
                        ToolParam {
                            name: "role".into(),
                            param_type: "string".into(),
                            description: Some(
                                "The sub-agent role to delegate to: researcher, analyst, reviewer, architect, debugger, implementer, tester, or security.".into(),
                            ),
                            required: true,
                            properties: None,
                            items: None,
                        },
                    );
                    p.insert(
                        "task".into(),
                        ToolParam {
                            name: "task".into(),
                            param_type: "string".into(),
                            description: Some(
                                "The specific task or question for the sub-agent. Be precise and focused.".into(),
                            ),
                            required: true,
                            properties: None,
                            items: None,
                        },
                    );
                    p
                }),
                required: Some(vec!["role".into(), "task".into()]),
            }),
        ToolHandler::new(move |_ctx, args| {
            // Extract owned values from args before async move (avoid borrowing across boundary)
            let role = args
                .get("role")
                .and_then(|v| v.as_str())
                .unwrap_or("researcher")
                .to_owned();
            let task = args
                .get("task")
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_owned();
            let profile = profile_for_role_name(role.as_str());
            let mut sub_request = captured_request.clone();
            sub_request.input = format!(
                "Sub-agent role: {role}\n\nTask:\n{task}\n\nOriginal user request:\n{}\n\nInstructions:\nFocus only on your assigned task. Return concise, evidence-backed findings.",
                captured_request.input
            );
            sub_request.execution_profile = Some(profile.clone());
            sub_request.tools = Some(filter_tools_for_sub_agent(
                &captured_request.tools,
                &profile.allowed_tools,
            ));
            sub_request.max_tool_depth = profile
                .max_tool_depth
                .unwrap_or(captured_request.max_tool_depth);
            sub_request.temperature = profile.temperature.unwrap_or(captured_request.temperature);
            sub_request.max_tokens = profile.max_tokens.unwrap_or(captured_request.max_tokens);
            sub_request.top_p = profile.top_p.unwrap_or(captured_request.top_p);
            sub_request.frequency_penalty = profile
                .frequency_penalty
                .unwrap_or(captured_request.frequency_penalty);
            sub_request.presence_penalty = profile
                .presence_penalty
                .unwrap_or(captured_request.presence_penalty);
            // Clone captured values for this invocation so the Fn closure retains them
            let mb = captured_message_builder.clone();
            let sub_registry = captured_original_registry.clone();
            let base = captured_base_url.clone();
            let key = captured_api_key.clone();
            Box::pin(async move {
                let result = execute_process(sub_request, &mb, &sub_registry, base, key).await;
                match result {
                    Ok(r) => Ok(serde_json::json!({
                        "role": role,
                        "output": r.content,
                        "input_tokens": r.input_tokens,
                        "output_tokens": r.output_tokens,
                    })),
                    Err(e) => Err(ToolError::ExecutionError(format!(
                        "Sub-agent '{}' execution failed: {}",
                        role, e
                    ))),
                }
            })
        }),
    );
 }
 /// Filter the original tool definitions by the sub-agent's allowed list,
 /// always excluding `call_sub_agent` to prevent recursive delegation.
 fn filter_tools_for_sub_agent(
    original_tools: &Option<Vec<serde_json::Value>>,
    allowed_tools: &Option<Vec<String>>,
 ) -> Vec<serde_json::Value> {
    let Some(tools) = original_tools else {
        return Vec::new();
    };
    let allowed = allowed_tools.as_ref().map(|list| {
        list.iter()
            .filter(|n| *n != "call_sub_agent")
            .cloned()
            .collect::<Vec<String>>()
    });
    match allowed {
        Some(allowed_list) if !allowed_list.is_empty() => tools
            .iter()
            .filter(|tool| {
                let name = tool
                    .get("function")
                    .and_then(|f| f.get("name"))
                    .and_then(|v| v.as_str())
                    .unwrap_or("");
                allowed_list.iter().any(|allowed| allowed == name)
            })
            .cloned()
            .collect(),
        _ => tools
            .iter()
            .filter(|tool| {
                tool.get("function")
                    .and_then(|f| f.get("name"))
                    .and_then(|v| v.as_str())
                    .is_some_and(|name| name != "call_sub_agent")
            })
            .cloned()
            .collect(),
    }
 }
--- a/libs/agent/chat/react_execution.rs
+++ b/libs/agent/chat/react_execution.rs
@ -1,233 +0,0 @@
 use futures::StreamExt;
 use models::rooms::room_ai;
 use rig::agent::{AgentBuilder, MultiTurnStreamItem};
 use rig::client::CompletionClient;
 use rig::streaming::{StreamedAssistantContent, StreamingPrompt};
 use sea_orm::*;
 use uuid::Uuid;
 use super::AiRequest;
 use super::session_recording::record_ai_session;
 use crate::client::AiClientConfig;
 use crate::error::{AgentError, Result};
 use crate::react::types::Action as ReactAction;
 use crate::react::{DEFAULT_SYSTEM_PROMPT, ReactStep};
 use crate::tool::{RecordingTool, registry::ToolRegistry};
 pub async fn execute_process_react<C, Fut>(
    request: &AiRequest,
    mut on_chunk: C,
    tool_registry: &ToolRegistry,
    ai_base_url: Option<String>,
    ai_api_key: Option<String>,
    room_preamble: Option<&str>,
    message_producer: Option<queue::MessageProducer>,
 ) -> Result<(String, i64, i64)>
 where
    C: FnMut(ReactStep) -> Fut + Send,
    Fut: std::future::Future<Output = ()> + Send,
 {
    let base_url = ai_base_url.unwrap_or_else(|| "https://api.openai.com".into());
    let api_key = ai_api_key.unwrap_or_default();
    let client_config = AiClientConfig::new(api_key).with_base_url(base_url);
    let db = request.db.clone();
    let cache = request.cache.clone();
    let cfg = request.config.clone();
    let room_id = request.room.id;
    let sender_uid = request.sender.uid;
    let project_id = request.project.id;
    let ai_model_id = request.model.id;
    let ai_model_name = request.model.name.clone();
    let sent_in_turn = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
    let session_id = Uuid::now_v7();
    let session_start = std::time::Instant::now();
    let version_id = room_ai::Entity::find()
        .filter(room_ai::Column::Room.eq(request.room.id))
        .filter(room_ai::Column::Model.eq(request.model.id))
        .one(&request.db)
        .await
        .ok()
        .flatten()
        .and_then(|r| r.version);
    let mut tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>> = Vec::new();
    for def in tool_registry.definitions() {
        let name = def.name.clone();
        if let Some(handler) = tool_registry.get(&name) {
            let adapter = crate::tool::RigToolAdapter::new(
                handler.clone(),
                def.clone(),
                db.clone(),
                cache.clone(),
                cfg.clone(),
                room_id,
                Some(sender_uid),
                project_id,
                message_producer.clone(),
                Some(ai_model_id),
                Some(ai_model_name.clone()),
                sent_in_turn.clone(),
            );
            tools.push(Box::new(RecordingTool::new(
                Box::new(adapter),
                db.clone(),
                session_id,
                sender_uid,
            )));
        }
    }
    let rig_client = client_config.build_rig_client();
    let model = rig_client.completion_model(&request.model.name);
    // General rules first (strong LLM attention), room context appended after
    // so that output-format rules aren't buried behind long room preamble.
    let preamble = match room_preamble {
        Some(rp) => format!("{}\n{}", DEFAULT_SYSTEM_PROMPT, rp),
        None => DEFAULT_SYSTEM_PROMPT.to_string(),
    };
    let agent = AgentBuilder::new(model)
        .preamble(&preamble)
        .tools(tools)
        .default_max_turns(request.max_tool_depth)
        .build();
    let stream = agent
        .stream_prompt(&request.input)
        .with_history(Vec::<rig::completion::Message>::new())
        .multi_turn(request.max_tool_depth)
        .await;
    tokio::pin!(stream);
    let mut step_count = 0usize;
    let mut final_content = String::new();
    let mut total_input_tokens: i64 = 0;
    let mut total_output_tokens: i64 = 0;
    while let Some(item) = stream.next().await {
        match item {
            Ok(MultiTurnStreamItem::StreamAssistantItem(StreamedAssistantContent::Text(text))) => {
                step_count += 1;
                let t = text.text;
                on_chunk(ReactStep::Answer {
                    step: step_count,
                    answer: t.clone(),
                })
                .await;
                final_content.push_str(&t);
            }
            Ok(MultiTurnStreamItem::StreamAssistantItem(StreamedAssistantContent::Reasoning(
                reasoning,
            ))) => {
                let reasoning_text: String = reasoning
                    .content
                    .iter()
                    .filter_map(|c| match c {
                        rig::completion::message::ReasoningContent::Text { text, .. } => {
                            Some(text.as_str())
                        }
                        _ => None,
                    })
                    .collect::<Vec<_>>()
                    .join("");
                if !reasoning_text.is_empty() {
                    step_count += 1;
                    on_chunk(ReactStep::Thought {
                        step: step_count,
                        thought: reasoning_text,
                    })
                    .await;
                }
            }
            Ok(MultiTurnStreamItem::StreamAssistantItem(
                StreamedAssistantContent::ReasoningDelta { reasoning, .. },
            )) => {
                if !reasoning.is_empty() {
                    step_count += 1;
                    on_chunk(ReactStep::Thought {
                        step: step_count,
                        thought: reasoning,
                    })
                    .await;
                }
            }
            Ok(MultiTurnStreamItem::StreamAssistantItem(StreamedAssistantContent::ToolCall {
                tool_call,
                ..
            })) => {
                step_count += 1;
                let args: serde_json::Value = match &tool_call.function.arguments {
                    serde_json::Value::String(s) => {
                        serde_json::from_str(s).unwrap_or(serde_json::Value::Null)
                    }
                    v => v.clone(),
                };
                on_chunk(ReactStep::Action {
                    step: step_count,
                    action: ReactAction::new(&tool_call.function.name, args),
                })
                .await;
            }
            Ok(MultiTurnStreamItem::StreamUserItem(
                rig::streaming::StreamedUserContent::ToolResult { tool_result, .. },
            )) => {
                step_count += 1;
                let obs = tool_result_content_to_string(&tool_result.content);
                on_chunk(ReactStep::Observation {
                    step: step_count,
                    observation: obs,
                })
                .await;
            }
            Ok(MultiTurnStreamItem::FinalResponse(resp)) => {
                let usage = resp.usage();
                total_input_tokens = usage.input_tokens as i64;
                total_output_tokens = usage.output_tokens as i64;
            }
            Err(e) => {
                let err_msg = format!("rig agent stream error: {}", e);
                return Err(AgentError::OpenAi(err_msg));
            }
            _ => {}
        }
    }
    let elapsed_ms = session_start.elapsed().as_millis() as i64;
    record_ai_session(
        &request.cache,
        &request.db,
        request.project.id,
        request.sender.uid,
        session_id,
        request.room.id,
        request.model.id,
        version_id.unwrap_or_default(),
        total_input_tokens,
        total_output_tokens,
        elapsed_ms,
    )
    .await;
    Ok((final_content, total_input_tokens, total_output_tokens))
 }
 /// Extract text from rig's ToolResultContent, ignoring images.
 fn tool_result_content_to_string(
    content: &rig::one_or_many::OneOrMany<rig::completion::message::ToolResultContent>,
 ) -> String {
    use rig::completion::message::ToolResultContent;
    content
        .iter()
        .filter_map(|item| {
            if let ToolResultContent::Text(t) = item {
                Some(t.text.clone())
            } else {
                None
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
 }
--- a/libs/agent/chat/service.rs
+++ b/libs/agent/chat/service.rs
@ -1,268 +0,0 @@
 use super::message_builder::MessageBuilder;
 use super::{AiRequest, StreamCallback};
 use crate::client::AiClientConfig;
 use crate::client::StreamChunk;
 use crate::compact::CompactService;
 use crate::embed::EmbedService;
 use crate::error::Result;
 use crate::perception::PerceptionService;
 use crate::tool::registry::ToolRegistry;
 use queue::MessageProducer;
 /// Result from streaming AI response.
 pub struct StreamResult {
    pub content: String,
    pub reasoning_content: String,
    pub input_tokens: i64,
    pub output_tokens: i64,
    /// All chunks in arrival order — preserves ReAct multi-cycle ordering.
    pub chunks: Vec<StreamChunk>,
 }
 /// Result from non-streaming AI response.
 pub struct ProcessResult {
    pub content: String,
    pub input_tokens: i64,
    pub output_tokens: i64,
 }
 /// Service for handling AI chat requests in rooms.
 pub struct ChatService {
    ai_base_url: Option<String>,
    ai_api_key: Option<String>,
    message_builder: MessageBuilder,
    tool_registry: Option<ToolRegistry>,
 }
 impl ChatService {
    pub fn new() -> Self {
        Self {
            ai_base_url: None,
            ai_api_key: None,
            message_builder: MessageBuilder::new(),
            tool_registry: None,
        }
    }
    pub fn with_ai_client_config(mut self, config: AiClientConfig) -> Self {
        self.ai_base_url = config.base_url.clone();
        self.ai_api_key = Some(config.api_key.clone());
        self
    }
    pub fn with_compact_service(mut self, compact_service: CompactService) -> Self {
        self.message_builder = self.message_builder.with_compact_service(compact_service);
        self
    }
    pub fn with_embed_service(mut self, embed_service: EmbedService) -> Self {
        self.message_builder = self.message_builder.with_embed_service(embed_service);
        self
    }
    pub fn with_perception_service(mut self, perception_service: PerceptionService) -> Self {
        self.message_builder = self
            .message_builder
            .with_perception_service(perception_service);
        self
    }
    pub fn with_tool_registry(mut self, registry: ToolRegistry) -> Self {
        self.tool_registry = Some(registry);
        self
    }
    /// Returns all registered tools as JSON tool definitions.
    pub fn tools(&self) -> Vec<serde_json::Value> {
        self.tool_registry
            .as_ref()
            .map(|r| r.to_openai_tools())
            .unwrap_or_default()
    }
    /// Build a RigToolSet from the registered tool registry.
    ///
    /// This enables using the same tools with `RigAgentService` via rig's native Agent.
    /// The context (db, cache, config, room_id, sender_id) is passed through to each
    /// tool handler at creation time.
    #[cfg(feature = "rig")]
    pub fn rig_toolset(
        &self,
        db: db::database::AppDatabase,
        cache: db::cache::AppCache,
        config: config::AppConfig,
        room_id: uuid::Uuid,
        sender_id: Option<uuid::Uuid>,
        project_id: uuid::Uuid,
    ) -> Option<crate::RigToolSet> {
        self.tool_registry.as_ref().map(|registry| {
            crate::RigToolSet::from_registry(
                registry,
                db,
                cache,
                config,
                room_id,
                sender_id,
                project_id,
                None,
                None,
                None,
                std::sync::Arc::new(std::sync::Mutex::new(Vec::new())),
            )
        })
    }
    /// Get a reference to the underlying ToolRegistry.
    pub fn tool_registry(&self) -> Option<&ToolRegistry> {
        self.tool_registry.as_ref()
    }
    pub async fn build_room_optimized_context_text(
        &self,
        request: &AiRequest,
    ) -> Result<(String, Option<i64>)> {
        self.message_builder
            .build_room_optimized_context_text(request)
            .await
    }
    /// Process AI request without streaming (tool-call loop with non-streaming API).
    pub async fn process(&self, request: AiRequest) -> Result<ProcessResult> {
        super::orchestrator::execute_orchestrated_process(
            request,
            &self.message_builder,
            &self.tool_registry,
            self.ai_base_url.clone(),
            self.ai_api_key.clone(),
        )
        .await
    }
    /// Process AI request with streaming (tool-call loop with streaming API, incremental chunks).
    pub async fn process_stream(
        &self,
        request: AiRequest,
        on_chunk: StreamCallback,
    ) -> Result<StreamResult> {
        super::orchestrator::execute_orchestrated_stream(
            request,
            on_chunk,
            &self.message_builder,
            &self.tool_registry,
            self.ai_base_url.clone(),
            self.ai_api_key.clone(),
        )
        .await
    }
    /// Process AI request for room context — direct execution path (bypasses orchestrator).
    ///
    /// Room AI uses a fast single-agent loop: all tools available, no multi-agent delegation.
    /// Merges `room_tools` (send_message, retract_message) into the base registry,
    /// then runs `execute_process` / `execute_process_stream` directly.
    pub async fn process_room(
        &self,
        request: AiRequest,
        room_tools: ToolRegistry,
    ) -> Result<ProcessResult> {
        let mut merged = self.tool_registry.clone().unwrap_or_default();
        merged.merge(room_tools);
        super::nonstreaming_execution::execute_process(
            request,
            &self.message_builder,
            &Some(merged),
            self.ai_base_url.clone(),
            self.ai_api_key.clone(),
        )
        .await
    }
    /// Process AI request for room context with streaming — direct execution path.
    ///
    /// Same as `process_room` but with streaming response. Bypasses orchestrator,
    /// gives the room AI all tools (base + room) for fast single-agent execution.
    pub async fn process_room_stream(
        &self,
        request: AiRequest,
        on_chunk: StreamCallback,
        room_tools: ToolRegistry,
    ) -> Result<StreamResult> {
        let mut merged = self.tool_registry.clone().unwrap_or_default();
        merged.merge(room_tools);
        super::streaming_execution::execute_process_stream(
            request,
            on_chunk,
            &self.message_builder,
            &Some(merged),
            self.ai_base_url.clone(),
            self.ai_api_key.clone(),
        )
        .await
    }
    /// Process AI request via rig-based ReAct streaming loop.
    pub async fn process_react<C, Fut>(
        &self,
        request: &AiRequest,
        on_chunk: C,
    ) -> Result<(String, i64, i64)>
    where
        C: FnMut(crate::react::ReactStep) -> Fut + Send,
        Fut: std::future::Future<Output = ()> + Send,
    {
        let Some(registry) = &self.tool_registry else {
            return Err(crate::error::AgentError::Internal(
                "no tool registry registered".into(),
            ));
        };
        super::react_execution::execute_process_react(
            request,
            on_chunk,
            registry,
            self.ai_base_url.clone(),
            self.ai_api_key.clone(),
            None,
            None,
        )
        .await
    }
    /// Process AI request via rig-based ReAct streaming loop with room-specific tools.
    ///
    /// Merges `room_tools` (e.g. `send_message`, `retract_message`) into the base
    /// tool registry on-the-fly. The `room_preamble` is prepended to the default
    /// system prompt to instruct the AI about room communication rules.
    /// `message_producer` enables tools to publish events via the message queue.
    pub async fn process_react_room<C, Fut>(
        &self,
        request: &AiRequest,
        on_chunk: C,
        room_tools: ToolRegistry,
        room_preamble: Option<&str>,
        message_producer: Option<MessageProducer>,
    ) -> Result<(String, i64, i64)>
    where
        C: FnMut(crate::react::ReactStep) -> Fut + Send,
        Fut: std::future::Future<Output = ()> + Send,
    {
        let Some(registry) = &self.tool_registry else {
            return Err(crate::error::AgentError::Internal(
                "no tool registry registered".into(),
            ));
        };
        let mut merged = registry.clone();
        merged.merge(room_tools);
        super::react_execution::execute_process_react(
            request,
            on_chunk,
            &merged,
            self.ai_base_url.clone(),
            self.ai_api_key.clone(),
            room_preamble,
            message_producer,
        )
        .await
    }
 }
--- a/libs/agent/chat/state.rs
+++ b/libs/agent/chat/state.rs
@ -1,217 +0,0 @@
 //! Agent state machine — tracks lifecycle of a single AI agent invocation.
 //!
 //! States: Idle → Thinking → ToolCall → Thinking → ... → Answering | Error
 //! The Thinking ↔ ToolCall cycle repeats until max tool depth or final answer.
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 /// Current phase of an agent's execution lifecycle.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub enum AgentState {
    /// Agent is idle, waiting for input
    Idle,
    /// Agent is reasoning/thinking (may produce thinking chunks)
    Thinking {
        started_at: DateTime<Utc>,
        tool_depth: u32,
    },
    /// Agent is executing a tool call
    ToolCall {
        tool_name: String,
        started_at: DateTime<Utc>,
    },
    /// Agent is returning the final answer
    Answering {
        /// Accumulated answer content so far
        content_chars: u64,
        started_at: DateTime<Utc>,
    },
    /// Agent encountered a non-recoverable error
    Error { message: String, tool_depth: u32 },
 }
 impl AgentState {
    pub fn is_terminal(&self) -> bool {
        matches!(
            self,
            AgentState::Answering { .. } | AgentState::Error { .. }
        )
    }
    pub fn is_idle(&self) -> bool {
        matches!(self, AgentState::Idle)
    }
    pub fn current_phase(&self) -> &'static str {
        match self {
            AgentState::Idle => "idle",
            AgentState::Thinking { .. } => "thinking",
            AgentState::ToolCall { .. } => "tool_call",
            AgentState::Answering { .. } => "answering",
            AgentState::Error { .. } => "error",
        }
    }
 }
 /// State machine for agent lifecycle transitions.
 pub struct AgentRuntime {
    state: AgentState,
    max_tool_depth: u32,
    current_depth: u32,
 }
 impl AgentRuntime {
    pub fn new(max_tool_depth: u32) -> Self {
        Self {
            state: AgentState::Idle,
            max_tool_depth,
            current_depth: 0,
        }
    }
    pub fn state(&self) -> &AgentState {
        &self.state
    }
    /// Transition from Idle → Thinking
    pub fn start_thinking(&mut self) {
        debug_assert!(self.state.is_idle(), "must be Idle to start thinking");
        self.current_depth = 0;
        self.state = AgentState::Thinking {
            started_at: Utc::now(),
            tool_depth: 0,
        };
    }
    /// Transition from Thinking → ToolCall (increments tool depth)
    pub fn start_tool_call(&mut self, tool_name: String) -> Result<(), &'static str> {
        if !matches!(self.state, AgentState::Thinking { .. }) {
            return Err("must be Thinking to start tool call");
        }
        if self.current_depth >= self.max_tool_depth {
            return Err("max tool depth reached");
        }
        self.state = AgentState::ToolCall {
            tool_name,
            started_at: Utc::now(),
        };
        Ok(())
    }
    /// Transition from ToolCall → Thinking (back after tool result)
    pub fn complete_tool_call(&mut self) -> Result<(), &'static str> {
        if !matches!(self.state, AgentState::ToolCall { .. }) {
            return Err("must be ToolCall to complete");
        }
        self.current_depth += 1;
        self.state = AgentState::Thinking {
            started_at: Utc::now(),
            tool_depth: self.current_depth,
        };
        Ok(())
    }
    /// Transition to Answering (terminal)
    pub fn start_answer(&mut self) {
        self.state = AgentState::Answering {
            content_chars: 0,
            started_at: Utc::now(),
        };
    }
    pub fn append_answer(&mut self, content: &str) {
        if let AgentState::Answering { content_chars, .. } = &mut self.state {
            *content_chars += content.len() as u64;
        }
    }
    /// Transition to Error (terminal)
    pub fn fail(&mut self, message: String) {
        self.state = AgentState::Error {
            message,
            tool_depth: self.current_depth,
        };
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_starts_idle() {
        let rt = AgentRuntime::new(10);
        assert!(rt.state().is_idle());
        assert_eq!(rt.state().current_phase(), "idle");
    }
    #[test]
    fn test_idle_to_thinking() {
        let mut rt = AgentRuntime::new(10);
        rt.start_thinking();
        assert_eq!(rt.state().current_phase(), "thinking");
        assert!(!rt.state().is_terminal());
    }
    #[test]
    fn test_thinking_to_tool_call_and_back() {
        let mut rt = AgentRuntime::new(10);
        rt.start_thinking();
        rt.start_tool_call("search".into()).unwrap();
        assert_eq!(rt.state().current_phase(), "tool_call");
        rt.complete_tool_call().unwrap();
        assert_eq!(rt.state().current_phase(), "thinking");
    }
    #[test]
    fn test_thinking_to_answer() {
        let mut rt = AgentRuntime::new(10);
        rt.start_thinking();
        rt.start_answer();
        assert_eq!(rt.state().current_phase(), "answering");
        assert!(rt.state().is_terminal());
    }
    #[test]
    fn test_append_answer_tracks_chars() {
        let mut rt = AgentRuntime::new(10);
        rt.start_thinking();
        rt.start_answer();
        rt.append_answer("hello");
        if let AgentState::Answering { content_chars, .. } = rt.state() {
            assert_eq!(*content_chars, 5);
        } else {
            panic!("expected Answering state");
        }
    }
    #[test]
    fn test_error_is_terminal() {
        let mut rt = AgentRuntime::new(10);
        rt.start_thinking();
        rt.fail("something broke".into());
        assert_eq!(rt.state().current_phase(), "error");
        assert!(rt.state().is_terminal());
    }
    #[test]
    fn test_transition_from_wrong_state() {
        let mut rt = AgentRuntime::new(10);
        // Can't start tool call from Idle
        assert!(rt.start_tool_call("tool".into()).is_err());
        // Can't complete tool call from Idle
        assert!(rt.complete_tool_call().is_err());
    }
    #[test]
    fn test_max_depth_rejected() {
        let mut rt = AgentRuntime::new(2);
        rt.start_thinking();
        rt.start_tool_call("tool1".into()).unwrap();
        rt.complete_tool_call().unwrap();
        rt.start_tool_call("tool2".into()).unwrap();
        rt.complete_tool_call().unwrap();
        assert!(rt.start_tool_call("tool3".into()).is_err());
    }
 }
--- a/libs/agent/chat/streaming_execution.rs
+++ b/libs/agent/chat/streaming_execution.rs
@ -1,511 +0,0 @@
 use models::projects::project_skill;
 use models::rooms::room_ai;
 use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
 use std::pin::Pin;
 use std::sync::Arc;
 use uuid::Uuid;
 use super::message_builder::MessageBuilder;
 use super::service::StreamResult;
 use super::session_recording::record_ai_session;
 use super::{AiChunkType, AiRequest, AiStreamChunk, StreamCallback};
 use crate::client::AiClientConfig;
 use crate::client::types::{ChatRequestMessage, ToolCall};
 use crate::client::{StreamChunk, StreamChunkType, StreamedToolCall, call_stream};
 use crate::error::Result;
 use crate::perception::{SkillEntry, ToolCallEvent};
 use crate::tool::{ToolCall as AgentToolCall, ToolContext, ToolExecutor};
 type SharedCallback = Arc<
    dyn Fn(AiStreamChunk) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync,
 >;
 pub async fn execute_process_stream(
    request: AiRequest,
    on_chunk: StreamCallback,
    message_builder: &MessageBuilder,
    tool_registry: &Option<crate::tool::registry::ToolRegistry>,
    ai_base_url: Option<String>,
    ai_api_key: Option<String>,
 ) -> Result<StreamResult> {
    let on_chunk: SharedCallback = Arc::from(on_chunk);
    let tools: Vec<serde_json::Value> = request.tools.clone().unwrap_or_default();
    let tools_enabled = !tools.is_empty();
    let max_tool_depth = request.max_tool_depth;
    let mut messages = message_builder.build_messages(&request).await?;
    let room_ai_config = room_ai::Entity::find()
        .filter(room_ai::Column::Room.eq(request.room.id))
        .filter(room_ai::Column::Model.eq(request.model.id))
        .one(&request.db)
        .await?;
    let model_name = request.model.name.clone();
    let profile = request.execution_profile.as_ref();
    let temperature = profile
        .and_then(|p| p.temperature.map(|v| v as f32))
        .or_else(|| {
            room_ai_config
                .as_ref()
                .and_then(|r| r.temperature.map(|v| v as f32))
        })
        .unwrap_or(request.temperature as f32);
    let max_tokens = profile
        .and_then(|p| p.max_tokens.map(|v| v as u32))
        .or_else(|| {
            room_ai_config
                .as_ref()
                .and_then(|r| r.max_tokens.map(|v| v as u32))
        })
        .unwrap_or(request.max_tokens as u32);
    let mut tool_depth = 0;
    let mut total_input_tokens = 0i64;
    let mut total_output_tokens = 0i64;
    let session_id = Uuid::now_v7();
    let session_start = std::time::Instant::now();
    let version_id = room_ai_config.as_ref().and_then(|r| r.version);
    let config = AiClientConfig::new(ai_api_key.unwrap_or_default())
        .with_base_url(ai_base_url.unwrap_or_else(|| "https://api.openai.com".into()));
    let mut full_content = String::new();
    let mut all_chunks: Vec<StreamChunk> = Vec::new();
    let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<StreamedToolCall>();
    loop {
        let on_chunk_cb = on_chunk.clone();
        let on_chunk_cb2 = on_chunk.clone();
        let tx_arc = Arc::new(tx.clone());
        let tx_arc2 = tx_arc.clone();
        let response = call_stream(
            &messages,
            &model_name,
            &config,
            temperature,
            max_tokens,
            if tools_enabled { Some(&tools) } else { None },
            None,
            Arc::new(move |delta| {
                let content = delta.to_string();
                let fut = on_chunk_cb(AiStreamChunk {
                    content,
                    done: false,
                    chunk_type: AiChunkType::Answer,
                    metadata: None,
                    children_id: None,
                });
                fut
            }),
            Arc::new(move |delta| {
                let fut = on_chunk_cb2(AiStreamChunk {
                    content: delta.to_string(),
                    done: false,
                    chunk_type: AiChunkType::Thinking,
                    metadata: None,
                    children_id: None,
                });
                fut
            }),
            Arc::new(move |tc: &StreamedToolCall| {
                let tx = tx_arc2.clone();
                let tc_owned = tc.clone();
                Box::pin(async move {
                    let _ = tx.send(tc_owned);
                }) as Pin<Box<dyn std::future::Future<Output = ()> + Send>>
            }),
        )
        .await?;
        total_input_tokens += response.input_tokens;
        total_output_tokens += response.output_tokens;
        all_chunks.extend(response.chunks.clone());
        let has_tool_calls = tools_enabled && !response.tool_calls.is_empty();
        if !has_tool_calls {
            return handle_final_answer(
                response,
                all_chunks,
                &request,
                session_id,
                version_id,
                total_input_tokens,
                total_output_tokens,
                session_start,
            )
            .await;
        }
        full_content.push_str(&response.content);
        let tool_calls: Vec<ToolCall> = response
            .tool_calls
            .iter()
            .map(|tc| ToolCall {
                id: tc.id.clone(),
                type_: "function".into(),
                function: crate::client::types::ToolCallFunction {
                    name: tc.name.clone(),
                    arguments: tc.arguments.clone(),
                },
            })
            .collect();
        messages.push(ChatRequestMessage::assistant(
            Some(response.content.clone()),
            Some(tool_calls.clone()),
        ));
        drain_tool_call_notifications(&mut rx, &on_chunk, &mut all_chunks).await;
        let calls: Vec<AgentToolCall> = response
            .tool_calls
            .iter()
            .map(|tc| AgentToolCall {
                id: tc.id.clone(),
                name: tc.name.clone(),
                arguments: tc.arguments.clone(),
            })
            .collect();
        let tool_messages = execute_streaming_tools(
            &request,
            &calls,
            session_id,
            &on_chunk,
            &mut all_chunks,
            tool_registry,
            message_builder,
        )
        .await;
        messages.extend(tool_messages);
        inject_passive_skills_stream(
            &request,
            message_builder,
            &response.tool_calls,
            &mut messages,
        )
        .await;
        tool_depth += 1;
        if tool_depth >= max_tool_depth {
            let max_depth_text = format!(
                "[AI reached maximum tool depth ({}) — no final answer produced]",
                max_tool_depth
            );
            on_chunk(AiStreamChunk {
                content: max_depth_text.clone(),
                done: true,
                chunk_type: AiChunkType::Answer,
                metadata: None,
                children_id: None,
            })
            .await;
            all_chunks.push(StreamChunk {
                chunk_type: StreamChunkType::Answer,
                content: max_depth_text,
            });
            record_ai_session(
                &request.cache,
                &request.db,
                request.project.id,
                request.sender.uid,
                session_id,
                request.room.id,
                request.model.id,
                version_id.unwrap_or_default(),
                total_input_tokens,
                total_output_tokens,
                session_start.elapsed().as_millis() as i64,
            )
            .await;
            return Ok(StreamResult {
                content: full_content,
                reasoning_content: String::new(),
                input_tokens: 0,
                output_tokens: 0,
                chunks: all_chunks,
            });
        }
    }
 }
 async fn drain_tool_call_notifications(
    rx: &mut tokio::sync::mpsc::UnboundedReceiver<StreamedToolCall>,
    on_chunk: &SharedCallback,
    all_chunks: &mut Vec<StreamChunk>,
 ) {
    loop {
        match rx.try_recv() {
            Ok(tc) => {
                let args_display = if tc.arguments.len() > 100 {
                    let end = tc
                        .arguments
                        .char_indices()
                        .map(|(i, _)| i)
                        .take_while(|&i| i <= 100)
                        .last()
                        .unwrap_or(100);
                    format!("{}...", &tc.arguments[..end])
                } else {
                    tc.arguments.clone()
                };
                let tool_display = format!("🔧 {}({})", tc.name, args_display);
                // Parse arguments JSON for structured metadata
                let args_json =
                    serde_json::from_str(&tc.arguments).unwrap_or(serde_json::json!({}));
                let metadata = serde_json::json!({
                    "tool": tc.name,
                    "args": args_json,
                    "display": tool_display.clone(),
                });
                on_chunk(AiStreamChunk {
                    content: tool_display.clone(),
                    done: false,
                    chunk_type: AiChunkType::ToolCall,
                    metadata: Some(metadata),
                    children_id: None,
                })
                .await;
                all_chunks.push(StreamChunk {
                    chunk_type: StreamChunkType::ToolCall,
                    content: tool_display,
                });
            }
            Err(tokio::sync::mpsc::error::TryRecvError::Empty) => break,
            Err(tokio::sync::mpsc::error::TryRecvError::Disconnected) => break,
        }
    }
 }
 async fn execute_streaming_tools(
    request: &AiRequest,
    calls: &[AgentToolCall],
    session_id: Uuid,
    on_chunk: &SharedCallback,
    all_chunks: &mut Vec<StreamChunk>,
    tool_registry: &Option<crate::tool::registry::ToolRegistry>,
    message_builder: &MessageBuilder,
 ) -> Vec<ChatRequestMessage> {
    let mut tool_messages = Vec::new();
    let mut ctx = ToolContext::new(
        request.db.clone(),
        request.cache.clone(),
        request.config.clone(),
        request.room.id,
        Some(request.sender.uid),
    )
    .with_project(request.project.id);
    if let Some(es) = &message_builder.embed_service {
        ctx = ctx.with_embed_service(es.clone());
    }
    if let Some(registry) = tool_registry {
        ctx.registry_mut().merge(registry.clone());
    }
    let recorder =
        crate::tool::recorder::ToolCallRecorder::with_session(request.db.clone(), session_id);
    let mut join_set = tokio::task::JoinSet::new();
    for call in calls {
        let call_clone = call.clone();
        let mut ctx_clone = ctx.clone();
        let sender_uid = request.sender.uid;
        let recorder_clone = recorder.clone();
        join_set.spawn(async move {
            let start = std::time::Instant::now();
            let executor = ToolExecutor::new();
            let res = executor
                .execute_batch(vec![call_clone.clone()], &mut ctx_clone)
                .await;
            (call_clone, res, start.elapsed(), sender_uid, recorder_clone)
        });
    }
    let heartbeat_dur = std::time::Duration::from_secs(10);
    while !join_set.is_empty() {
        tokio::select! {
            Some(res) = join_set.join_next() => {
                if let Ok((call, results, elapsed, sender_uid, recorder)) = res {
                    match results {
                        Ok(results) => {
                            for result in &results {
                                let text = match &result.result { crate::tool::ToolResult::Ok(v) => v.to_string(), crate::tool::ToolResult::Error(msg) => msg.clone() };
                                let preview = if text.len() > 300 {
                                    let end = text.char_indices().map(|(i, _)| i).take_while(|&i| i <= 300).last().unwrap_or(300);
                                    format!("{}...", &text[..end])
                                } else { text.clone() };
                                tracing::debug!("tool_result: {} — {}", call.name, preview);
                                let is_error = matches!(result.result, crate::tool::ToolResult::Error(_));
                                let error_msg = match &result.result { crate::tool::ToolResult::Error(msg) => Some(msg.clone()), _ => None };
                                recorder.record(crate::tool::recorder::ToolCallRecord {
                                    tool_call_id: call.id.clone(),
                                    session_id: recorder.session_id(),
                                    tool_name: call.name.clone(),
                                    caller: sender_uid,
                                    arguments: call.arguments_json().unwrap_or_default(),
                                    status: if is_error { models::ai::ToolCallStatus::Failed } else { models::ai::ToolCallStatus::Success },
                                    execution_time_ms: Some(elapsed.as_millis() as i64),
                                    error_message: error_msg,
                                    error_stack: None,
                                    retry_count: 0
                                });
                            }
                            let success_display = format!("✅ {}", call.name);
                            let result_preview: Vec<String> = results.iter().map(|r| {
                                match &r.result { crate::tool::ToolResult::Ok(v) => v.to_string(), crate::tool::ToolResult::Error(msg) => msg.clone() }
                            }).collect();
                            let metadata = serde_json::json!({
                                "tool": call.name,
                                "status": "ok",
                                "result": result_preview.join("\n").chars().take(500).collect::<String>(),
                                "display": success_display.clone(),
                            });
                            on_chunk(AiStreamChunk {
                                content: success_display.clone(),
                                done: false,
                                chunk_type: AiChunkType::ToolResult,
                                metadata: Some(metadata),
                                children_id: Some(call.id.clone()),
                            }).await;
                            all_chunks.push(StreamChunk { chunk_type: StreamChunkType::ToolCall, content: success_display });
                            let msgs = ToolExecutor::to_tool_messages(&results);
                            tool_messages.extend(msgs);
                        }
                        Err(e) => {
                            recorder.record(crate::tool::recorder::ToolCallRecord {
                                tool_call_id: call.id.clone(),
                                session_id: recorder.session_id(),
                                tool_name: call.name.clone(),
                                caller: sender_uid,
                                arguments: call.arguments_json().unwrap_or_default(),
                                status: models::ai::ToolCallStatus::Failed,
                                execution_time_ms: Some(elapsed.as_millis() as i64),
                                error_message: Some(e.to_string()),
                                error_stack: None,
                                retry_count: 0
                            });
                            let err_text = format!("[Tool call failed: {}]", e);
                            tracing::warn!(tool = %call.name, args = %call.arguments, error = %e, "tool_call_failed");
                            let err_display = format!("❌ {} (failed)", call.name);
                            let metadata = serde_json::json!({
                                "tool": call.name,
                                "status": "error",
                                "result": e.to_string(),
                                "display": err_display.clone(),
                            });
                            on_chunk(AiStreamChunk {
                                content: err_display.clone(),
                                done: false,
                                chunk_type: AiChunkType::ToolResult,
                                metadata: Some(metadata),
                                children_id: None,
                            }).await;
                            all_chunks.push(StreamChunk { chunk_type: StreamChunkType::ToolCall, content: err_display });
                            tool_messages.push(ChatRequestMessage::tool(&call.id, &err_text));
                        }
                    }
                }
            },
            _ = tokio::time::sleep(heartbeat_dur) => {
                on_chunk(AiStreamChunk { content: String::new(), done: false, chunk_type: AiChunkType::ToolCall, metadata: None, children_id: None }).await;
            }
        }
    }
    tool_messages
 }
 async fn handle_final_answer(
    response: crate::client::StreamResponse,
    all_chunks: Vec<StreamChunk>,
    request: &AiRequest,
    session_id: Uuid,
    version_id: Option<Uuid>,
    total_input_tokens: i64,
    total_output_tokens: i64,
    session_start: std::time::Instant,
 ) -> Result<StreamResult> {
    let full_content = response.content.clone();
    // Don't push full content as a chunk — incremental deltas in
    // response.chunks (already accumulated above) sum to the same text.
    // merge_consecutive_blocks would concatenate delta_sum + full =
    // 2× full, causing duplicate content in DB persistence.
    record_ai_session(
        &request.cache,
        &request.db,
        request.project.id,
        request.sender.uid,
        session_id,
        request.room.id,
        request.model.id,
        version_id.unwrap_or_default(),
        total_input_tokens,
        total_output_tokens,
        session_start.elapsed().as_millis() as i64,
    )
    .await;
    Ok(StreamResult {
        content: full_content,
        reasoning_content: response.reasoning_content,
        input_tokens: total_input_tokens,
        output_tokens: total_output_tokens,
        chunks: all_chunks,
    })
 }
 async fn inject_passive_skills_stream(
    request: &AiRequest,
    message_builder: &MessageBuilder,
    tool_calls: &[StreamedToolCall],
    messages: &mut Vec<ChatRequestMessage>,
 ) {
    if let Ok(skills) = project_skill::Entity::find()
        .filter(project_skill::Column::ProjectUuid.eq(request.project.id))
        .filter(project_skill::Column::Enabled.eq(true))
        .all(&request.db)
        .await
    {
        let mut skill_entries: Vec<SkillEntry> = skills
            .into_iter()
            .map(|s| SkillEntry {
                slug: s.slug,
                name: s.name,
                description: s.description,
                content: s.content,
            })
            .collect();
        for built_in in crate::skills::all_skills() {
            if !skill_entries.iter().any(|s| s.slug == built_in.slug) {
                skill_entries.push(SkillEntry {
                    slug: built_in.slug.to_string(),
                    name: built_in.name.to_string(),
                    description: Some(built_in.description.to_string()),
                    content: built_in.content.clone(),
                });
            }
        }
        let tool_events: Vec<ToolCallEvent> = tool_calls
            .iter()
            .map(|tc| ToolCallEvent {
                tool_name: tc.name.clone(),
                arguments: tc.arguments.clone(),
            })
            .collect();
        let mut contexts = Vec::new();
        for event in &tool_events {
            if let Some(ctx) = message_builder
                .perception_service
                .passive
                .detect(event, &skill_entries)
            {
                MessageBuilder::push_unique_skill_context(&mut contexts, ctx);
            }
        }
        for ctx in contexts {
            messages.push(ctx.to_system_message());
        }
    }
 }
--- a/libs/agent/client/mod.rs
+++ b/libs/agent/client/mod.rs
@ -1,831 +0,0 @@
 //! Unified AI client with built-in retry, token tracking, and session recording.
 //!
 //! Uses rig-core as the underlying AI provider library.
 pub mod types;
 pub use types::{ChatRequestMessage, ToolCall as ClientToolCall};
 use std::pin::Pin;
 use std::sync::Arc;
 use std::time::Instant;
 use uuid::Uuid;
 use crate::error::{AgentError, Result};
 use futures::StreamExt;
 use rig::completion::message::{AssistantContent, Message as RigMessage};
 use rig::completion::{CompletionModel, GetTokenUsage, ToolDefinition};
 use rig::one_or_many::OneOrMany;
 use rig::prelude::CompletionClient;
 use rig::providers::openai;
 /// AI call metrics — increments metrics crate counters for all AI calls.
 #[derive(Debug, Clone, Default)]
 pub struct AiMetrics;
 impl AiMetrics {
    pub fn new() -> Self {
        Self
    }
    pub fn record_success(&self, input_tokens: i64, output_tokens: i64, has_function_call: bool) {
        metrics::counter!("ai_calls_total").increment(1);
        metrics::counter!("ai_calls_success").increment(1);
        if input_tokens > 0 {
            metrics::counter!("ai_input_tokens_total").increment(input_tokens as u64);
        }
        if output_tokens > 0 {
            metrics::counter!("ai_output_tokens_total").increment(output_tokens as u64);
        }
        if has_function_call {
            metrics::counter!("ai_function_calls_total").increment(1);
        }
    }
    pub fn record_failure(&self) {
        metrics::counter!("ai_calls_total").increment(1);
        metrics::counter!("ai_calls_failure").increment(1);
    }
 }
 /// Configuration for the AI client.
 #[derive(Clone)]
 pub struct AiClientConfig {
    pub api_key: String,
    pub base_url: Option<String>,
 }
 impl AiClientConfig {
    pub fn new(api_key: String) -> Self {
        Self {
            api_key,
            base_url: None,
        }
    }
    pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
        self.base_url = Some(base_url.into());
        self
    }
    /// Build a rig OpenAI client from this config.
    pub fn build_rig_client(&self) -> openai::Client {
        let base = self
            .base_url
            .clone()
            .unwrap_or_else(|| "https://api.openai.com".to_string());
        openai::Client::builder()
            .api_key(&self.api_key)
            .base_url(&base)
            .build()
            .expect("Failed to build rig OpenAI client")
    }
 }
 /// Response from an AI call, including usage statistics.
 #[derive(Debug, Clone)]
 pub struct AiCallResponse {
    pub content: String,
    pub input_tokens: i64,
    pub output_tokens: i64,
    pub latency_ms: i64,
    pub tool_calls: Vec<ClientToolCall>,
    pub tool_calls_finished: Vec<String>,
 }
 impl AiCallResponse {
    pub fn total_tokens(&self) -> i64 {
        self.input_tokens + self.output_tokens
    }
 }
 /// Internal state for retry tracking.
 #[derive(Debug)]
 struct RetryState {
    attempt: u32,
    max_retries: u32,
    max_backoff_ms: u64,
 }
 impl RetryState {
    fn new(max_retries: u32) -> Self {
        Self {
            attempt: 0,
            max_retries,
            max_backoff_ms: 5000,
        }
    }
    fn should_retry(&self) -> bool {
        self.attempt < self.max_retries
    }
    fn backoff_duration(&self) -> std::time::Duration {
        let exp = self.attempt.min(5);
        let base_ms = 500u64
            .saturating_mul(2u64.pow(exp))
            .min(self.max_backoff_ms);
        let max_jitter = (base_ms / 2).max(base_ms);
        let offset = fastrand_u64(max_jitter + 1).saturating_sub(base_ms / 2);
        let total = base_ms.saturating_add(offset).min(self.max_backoff_ms);
        std::time::Duration::from_millis(total)
    }
    fn next(&mut self) {
        self.attempt += 1;
    }
 }
 fn fastrand_u64(n: u64) -> u64 {
    use std::sync::atomic::{AtomicU64, Ordering};
    static STATE: AtomicU64 = AtomicU64::new(0x193_667_6a_5e_7c_57);
    if n <= 1 {
        return 0;
    }
    let mut current = STATE.load(Ordering::Relaxed);
    loop {
        let new_val = current.wrapping_mul(6364136223846793005).wrapping_add(1);
        match STATE.compare_exchange_weak(current, new_val, Ordering::Relaxed, Ordering::Relaxed) {
            Ok(_) => return new_val % n,
            Err(actual) => current = actual,
        }
    }
 }
 fn is_retryable_error(err: &AgentError) -> bool {
    let msg = err.to_string();
    msg.contains("connection refused")
        || msg.contains("connection timed out")
        || msg.contains("network error")
        || msg.contains("dns error")
        || msg.contains("error sending request")
        || msg.contains("Http client error")
        || msg.contains("rate_limit")
        || msg.contains("rate limit")
        || msg.contains("429")
        || msg.contains("500")
        || msg.contains("502")
        || msg.contains("503")
        || msg.contains("504")
        || msg.contains("internal_server_error")
        || msg.contains("service_unavailable")
        || msg.contains("gateway_timeout")
        || msg.contains("bad_gateway")
 }
 static AI_METRICS: std::sync::OnceLock<AiMetrics> = std::sync::OnceLock::new();
 fn ai_metrics() -> &'static AiMetrics {
    AI_METRICS.get_or_init(AiMetrics::new)
 }
 // ── Type conversions ─────────────────────────────────────────────────────────
 pub(crate) fn to_rig_message(msg: &ChatRequestMessage) -> RigMessage {
    match msg.role.as_str() {
        "system" => {
            // System messages are handled via preamble(), not passed as messages.
            // We still need to return a valid RigMessage variant.
            RigMessage::user(msg.content.as_deref().unwrap_or(""))
        }
        "user" => RigMessage::user(msg.content.as_deref().unwrap_or("")),
        "assistant" => {
            let mut parts: Vec<AssistantContent> = Vec::new();
            if let Some(ref content) = msg.content {
                if !content.is_empty() {
                    parts.push(AssistantContent::text(content));
                }
            }
            if let Some(ref tool_calls) = msg.tool_calls {
                for tc in tool_calls {
                    // GLM may return empty tool call IDs — fall back to a generated UUID.
                    let id = if tc.id.is_empty() {
                        Uuid::new_v4().to_string()
                    } else {
                        tc.id.clone()
                    };
                    parts.push(AssistantContent::tool_call_with_call_id(
                        &id,
                        id.clone(),
                        &tc.function.name,
                        serde_json::from_str(&tc.function.arguments)
                            .unwrap_or(serde_json::Value::Null),
                    ));
                }
            }
            if parts.is_empty() {
                RigMessage::assistant("")
            } else if parts.len() == 1 {
                // Single part — use simpler constructors
                match parts.pop().unwrap() {
                    AssistantContent::Text(t) => RigMessage::assistant(t.text),
                    ac => RigMessage::Assistant {
                        id: None,
                        content: OneOrMany::one(ac),
                    },
                }
            } else {
                let content = OneOrMany::many(parts).expect("non-empty parts");
                RigMessage::Assistant { id: None, content }
            }
        }
        "tool" | "function" => {
            let id = msg.tool_call_id.as_deref().unwrap_or("unknown").to_string();
            let call_id = msg.tool_call_id.clone().or_else(|| Some(id.clone()));
            let content = msg.content.as_deref().unwrap_or("");
            RigMessage::tool_result_with_call_id(id, call_id, content)
        }
        "developer" => {
            // Developer role maps to user/system in rig
            RigMessage::user(msg.content.as_deref().unwrap_or(""))
        }
        _ => RigMessage::user(msg.content.as_deref().unwrap_or("")),
    }
 }
 fn to_rig_tool_def(tool_json: &serde_json::Value) -> Option<ToolDefinition> {
    let name = tool_json
        .get("function")
        .and_then(|f| f.get("name"))
        .and_then(|n| n.as_str())?
        .to_string();
    let description = tool_json
        .get("function")
        .and_then(|f| f.get("description"))
        .and_then(|d| d.as_str())
        .map(|s| s.to_string())
        .unwrap_or_default();
    let parameters = tool_json
        .get("function")
        .and_then(|f| f.get("parameters"))
        .cloned()
        .unwrap_or(serde_json::json!({}));
    Some(ToolDefinition {
        name,
        description,
        parameters,
    })
 }
 // ── Call helpers ─────────────────────────────────────────────────────────────
 async fn do_completion<M>(
    model: &M,
    messages: &[ChatRequestMessage],
    temperature: Option<f64>,
    max_tokens: Option<u32>,
    tools: Option<&[serde_json::Value]>,
    tool_choice: Option<&str>,
 ) -> Result<(String, u64, u64, Vec<ClientToolCall>, Vec<String>)>
 where
    M: CompletionModel<Client = openai::Client>,
 {
    let preamble = messages
        .iter()
        .find(|m| m.role == "system")
        .and_then(|m| m.content.as_deref())
        .unwrap_or("")
        .to_string();
    let non_system: Vec<RigMessage> = messages
        .iter()
        .filter(|m| m.role != "system")
        .map(to_rig_message)
        .collect();
    let tool_defs: Vec<ToolDefinition> = tools
        .map(|ts| ts.iter().filter_map(to_rig_tool_def).collect())
        .unwrap_or_default();
    let mut builder = model.completion_request("");
    if !preamble.is_empty() {
        builder = builder.preamble(preamble);
    }
    if !non_system.is_empty() {
        builder = builder.messages(non_system);
    }
    if let Some(t) = temperature {
        builder = builder.temperature(t);
    }
    if let Some(mt) = max_tokens {
        builder = builder.max_tokens(mt as u64);
    }
    if !tool_defs.is_empty() {
        builder = builder.tools(tool_defs);
    }
    // Only set tool_choice when explicitly provided (mirrors call_stream_once logic)
    if let Some(tc) = tool_choice {
        match tc {
            "none" => {
                builder = builder.tool_choice(rig::completion::message::ToolChoice::None);
            }
            "auto" => {
                builder = builder.tool_choice(rig::completion::message::ToolChoice::Auto);
            }
            s => {
                builder = builder.tool_choice(rig::completion::message::ToolChoice::Specific {
                    function_names: vec![s.to_string()],
                });
            }
        }
    }
    let response = builder
        .send()
        .await
        .map_err(|e| AgentError::OpenAi(e.to_string()))?;
    let mut content = String::new();
    let mut tool_names: Vec<String> = Vec::new();
    let mut tool_calls: Vec<ClientToolCall> = Vec::new();
    for item in response.choice {
        match item {
            AssistantContent::Text(t) => {
                content.push_str(&t.text);
            }
            AssistantContent::ToolCall(tc) => {
                tool_names.push(tc.function.name.clone());
                tool_calls.push(ClientToolCall {
                    id: tc.id,
                    type_: "function".into(),
                    function: types::ToolCallFunction {
                        name: tc.function.name,
                        arguments: serde_json::to_string(&tc.function.arguments)
                            .unwrap_or_else(|_| "{}".to_string()),
                    },
                });
            }
            AssistantContent::Reasoning(_) => {}
            AssistantContent::Image(_) => {}
        }
    }
    let input_tokens = response.usage.input_tokens;
    let output_tokens = response.usage.output_tokens;
    Ok((content, input_tokens, output_tokens, tool_calls, tool_names))
 }
 // ── Public API ───────────────────────────────────────────────────────────────
 /// Call the AI model with automatic retry (no custom params).
 pub async fn call_with_retry(
    messages: &[ChatRequestMessage],
    model_name: &str,
    config: &AiClientConfig,
    max_retries: Option<u32>,
 ) -> Result<AiCallResponse> {
    let client = config.build_rig_client();
    let model = client.completion_model(model_name);
    let mut state = RetryState::new(max_retries.unwrap_or(3));
    loop {
        let start = Instant::now();
        let result = do_completion(&model, messages, None, None, None, None).await;
        match result {
            Ok((content, input_tokens, output_tokens, tool_calls, tool_names)) => {
                let latency_ms = start.elapsed().as_millis() as i64;
                let has_function_call = !tool_names.is_empty();
                ai_metrics().record_success(
                    input_tokens as i64,
                    output_tokens as i64,
                    has_function_call,
                );
                return Ok(AiCallResponse {
                    content,
                    input_tokens: input_tokens as i64,
                    output_tokens: output_tokens as i64,
                    latency_ms,
                    tool_calls,
                    tool_calls_finished: tool_names,
                });
            }
            Err(ref err) if state.should_retry() && is_retryable_error(err) => {
                let duration = state.backoff_duration();
                tracing::warn!(
                    attempt = state.attempt + 1,
                    max_retries = state.max_retries,
                    backoff_ms = duration.as_millis() as u64,
                    model = %model_name,
                    error = %err,
                    "ai_call_retry"
                );
                tokio::time::sleep(duration).await;
                state.next();
            }
            Err(err) => {
                ai_metrics().record_failure();
                return Err(err);
            }
        }
    }
 }
 /// Call with custom parameters (temperature, max_tokens, optional tools, optional tool_choice).
 pub async fn call_with_params(
    messages: &[ChatRequestMessage],
    model_name: &str,
    config: &AiClientConfig,
    temperature: f32,
    max_tokens: u32,
    max_retries: Option<u32>,
    tools: Option<&[serde_json::Value]>,
    tool_choice: Option<&str>,
 ) -> Result<AiCallResponse> {
    let client = config.build_rig_client();
    let model = client.completion_model(model_name);
    let mut state = RetryState::new(max_retries.unwrap_or(3));
    loop {
        let start = Instant::now();
        let result = do_completion(
            &model,
            messages,
            Some(temperature as f64),
            Some(max_tokens),
            tools,
            tool_choice,
        )
        .await;
        match result {
            Ok((content, input_tokens, output_tokens, tool_calls, tool_names)) => {
                let latency_ms = start.elapsed().as_millis() as i64;
                let has_function_call = !tool_names.is_empty();
                ai_metrics().record_success(
                    input_tokens as i64,
                    output_tokens as i64,
                    has_function_call,
                );
                return Ok(AiCallResponse {
                    content,
                    input_tokens: input_tokens as i64,
                    output_tokens: output_tokens as i64,
                    latency_ms,
                    tool_calls,
                    tool_calls_finished: tool_names,
                });
            }
            Err(ref err) if state.should_retry() && is_retryable_error(err) => {
                let duration = state.backoff_duration();
                tracing::warn!(
                    attempt = state.attempt + 1,
                    max_retries = state.max_retries,
                    backoff_ms = duration.as_millis() as u64,
                    model = %model_name,
                    error = %err,
                    "ai_call_retry"
                );
                tokio::time::sleep(duration).await;
                state.next();
            }
            Err(err) => {
                ai_metrics().record_failure();
                return Err(err);
            }
        }
    }
 }
 /// A tool call extracted from streaming response with accumulated arguments.
 #[derive(Debug, Clone)]
 pub struct StreamedToolCall {
    /// Tool call ID
    pub id: String,
    /// Tool function name
    pub name: String,
    /// Accumulated JSON arguments string
    pub arguments: String,
 }
 /// Type of chunk in the streaming response, preserving arrival order.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum StreamChunkType {
    Thinking,
    Answer,
    ToolCall,
    ToolResult,
 }
 /// A single chunk from the streaming response in arrival order.
 #[derive(Debug, Clone)]
 pub struct StreamChunk {
    pub chunk_type: StreamChunkType,
    pub content: String,
 }
 /// Streaming result from rig.
 #[derive(Debug)]
 pub struct StreamResponse {
    pub content: String,
    pub input_tokens: i64,
    pub output_tokens: i64,
    /// Accumulated reasoning/thinking text from the model.
    pub reasoning_content: String,
    /// Full tool calls with accumulated arguments (not just names)
    pub tool_calls: Vec<StreamedToolCall>,
    /// All chunks in arrival order — preserves think/answer/tool interleaving.
    pub chunks: Vec<StreamChunk>,
 }
 /// Async callback: takes a string delta and broadcasts it to the WebSocket.
 /// The returned Future must be awaited by the caller.
 pub type StreamTextCb =
    Arc<dyn Fn(&str) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
 pub type StreamReasoningCb =
    Arc<dyn Fn(&str) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
 pub type StreamToolCallCb = Arc<
    dyn Fn(&StreamedToolCall) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>>
        + Send
        + Sync,
 >;
 /// Run a streaming chat completion with 60s timeout and 5 retries.
 pub async fn call_stream(
    messages: &[ChatRequestMessage],
    model_name: &str,
    config: &AiClientConfig,
    temperature: f32,
    max_tokens: u32,
    tools: Option<&[serde_json::Value]>,
    tool_choice: Option<&str>,
    on_text_delta: StreamTextCb,
    on_reasoning_delta: StreamReasoningCb,
    on_tool_call: StreamToolCallCb,
 ) -> Result<StreamResponse> {
    let mut state = RetryState::new(5);
    loop {
        let result = call_stream_once(
            messages,
            model_name,
            config,
            temperature,
            max_tokens,
            tools,
            tool_choice,
            on_text_delta.clone(),
            on_reasoning_delta.clone(),
            on_tool_call.clone(),
        )
        .await;
        match result {
            Ok(response) => return Ok(response),
            Err(ref err) if state.should_retry() && is_retryable_error(err) => {
                let duration = state.backoff_duration();
                tracing::warn!(
                    attempt = state.attempt + 1,
                    max_retries = 5,
                    backoff_ms = duration.as_millis() as u64,
                    model = %model_name,
                    error = %err,
                    "ai_stream_retry"
                );
                tokio::time::sleep(duration).await;
                state.next();
            }
            Err(err) => {
                ai_metrics().record_failure();
                return Err(err);
            }
        }
    }
 }
 /// Single attempt of streaming completion with 60s timeout.
 async fn call_stream_once(
    messages: &[ChatRequestMessage],
    model_name: &str,
    config: &AiClientConfig,
    temperature: f32,
    max_tokens: u32,
    tools: Option<&[serde_json::Value]>,
    tool_choice: Option<&str>,
    on_text_delta: StreamTextCb,
    on_reasoning_delta: StreamReasoningCb,
    on_tool_call: StreamToolCallCb,
 ) -> Result<StreamResponse> {
    let client = config.build_rig_client();
    let model = client.completion_model(model_name);
    let preamble = messages
        .iter()
        .find(|m| m.role == "system")
        .and_then(|m| m.content.as_deref())
        .unwrap_or("")
        .to_string();
    let non_system: Vec<RigMessage> = messages
        .iter()
        .filter(|m| m.role != "system")
        .map(to_rig_message)
        .collect();
    let tool_defs: Vec<ToolDefinition> = tools
        .map(|ts| ts.iter().filter_map(to_rig_tool_def).collect())
        .unwrap_or_default();
    let mut builder = model
        .completion_request("")
        .temperature(temperature as f64)
        .max_tokens(max_tokens as u64);
    if !preamble.is_empty() {
        builder = builder.preamble(preamble);
    }
    if !non_system.is_empty() {
        builder = builder.messages(non_system);
    }
    if !tool_defs.is_empty() {
        builder = builder.tools(tool_defs);
    }
    if let Some(tc) = tool_choice {
        match tc {
            "none" => {
                builder = builder.tool_choice(rig::completion::message::ToolChoice::None);
            }
            "auto" => {
                builder = builder.tool_choice(rig::completion::message::ToolChoice::Auto);
            }
            s => {
                builder = builder.tool_choice(rig::completion::message::ToolChoice::Specific {
                    function_names: vec![s.to_string()],
                });
            }
        }
    }
    let stream_fut = async {
        let mut stream = builder
            .stream()
            .await
            .map_err(|e| AgentError::OpenAi(e.to_string()))?;
        let mut content = String::new();
        let mut reasoning_content = String::new();
        let mut tool_calls: Vec<StreamedToolCall> = Vec::new();
        let mut chunks: Vec<StreamChunk> = Vec::new();
        // Some models (e.g. GLM) ignore tool_choice="none" and still emit tool_calls.
        // Filter them out so they don't cause spurious tool execution attempts.
        let skip_tool_calls = tool_choice == Some("none");
        use std::collections::HashMap;
        let mut partial_tool_calls: HashMap<String, StreamedToolCall> = HashMap::new();
        let mut stream_finished = false;
        use rig::streaming::StreamedAssistantContent;
        while let Some(item) = stream.next().await {
            match item {
                Ok(StreamedAssistantContent::Text(text)) => {
                    content.push_str(&text.text);
                    on_text_delta(&text.text).await;
                    chunks.push(StreamChunk {
                        chunk_type: StreamChunkType::Answer,
                        content: text.text,
                    });
                }
                Ok(StreamedAssistantContent::ToolCall {
                    tool_call,
                    internal_call_id,
                }) => {
                    if skip_tool_calls {
                        partial_tool_calls.remove(&internal_call_id);
                        continue;
                    }
                    let arguments = match &tool_call.function.arguments {
                        serde_json::Value::String(s) => s.clone(),
                        other => serde_json::to_string(other).unwrap_or_else(|_| "{}".to_string()),
                    };
                    let tc = StreamedToolCall {
                        id: tool_call.id.clone(),
                        name: tool_call.function.name.clone(),
                        arguments,
                    };
                    on_tool_call(&tc).await;
                    chunks.push(StreamChunk {
                        chunk_type: StreamChunkType::ToolCall,
                        content: serde_json::json!({
                            "id": tc.id,
                            "name": tc.name,
                            "arguments": tc.arguments,
                        })
                        .to_string(),
                    });
                    tool_calls.push(tc);
                    partial_tool_calls.remove(&internal_call_id);
                }
                Ok(StreamedAssistantContent::ToolCallDelta {
                    id,
                    internal_call_id,
                    content: delta_content,
                }) => {
                    if skip_tool_calls {
                        continue;
                    }
                    use rig::streaming::ToolCallDeltaContent;
                    match delta_content {
                        ToolCallDeltaContent::Name(name) => {
                            partial_tool_calls.insert(
                                internal_call_id.clone(),
                                StreamedToolCall {
                                    id: id.clone(),
                                    name,
                                    arguments: String::new(),
                                },
                            );
                        }
                        ToolCallDeltaContent::Delta(delta) => {
                            if let Some(tc) = partial_tool_calls.get_mut(&internal_call_id) {
                                tc.arguments.push_str(&delta);
                            }
                        }
                    }
                }
                Ok(StreamedAssistantContent::Reasoning(reasoning)) => {
                    for part in &reasoning.content {
                        if let rig::completion::message::ReasoningContent::Text { text, .. } = part
                        {
                            reasoning_content.push_str(text);
                            on_reasoning_delta(text).await;
                            chunks.push(StreamChunk {
                                chunk_type: StreamChunkType::Thinking,
                                content: text.clone(),
                            });
                        }
                    }
                }
                Ok(StreamedAssistantContent::ReasoningDelta { reasoning, .. }) => {
                    reasoning_content.push_str(&reasoning);
                    on_reasoning_delta(&reasoning).await;
                    chunks.push(StreamChunk {
                        chunk_type: StreamChunkType::Thinking,
                        content: reasoning.clone(),
                    });
                }
                Ok(StreamedAssistantContent::Final(response)) => {
                    stream_finished = true;
                    if !skip_tool_calls {
                        for (_, tc) in partial_tool_calls.drain() {
                            tool_calls.push(tc);
                        }
                    } else {
                        partial_tool_calls.drain();
                    }
                    if let Some(usage) = response.token_usage() {
                        let in_toks = usage.input_tokens as i64;
                        let out_toks = usage.output_tokens as i64;
                        ai_metrics().record_success(in_toks, out_toks, !tool_calls.is_empty());
                        return Ok(StreamResponse {
                            content,
                            reasoning_content,
                            input_tokens: in_toks,
                            output_tokens: out_toks,
                            tool_calls,
                            chunks,
                        });
                    }
                    // Usage not available from Final — fall through to flush
                }
                Err(e) => return Err(AgentError::OpenAi(e.to_string())),
            }
        }
        // Flush any remaining partial tool calls (if stream ended without Final or Final had no usage)
        if !stream_finished && !skip_tool_calls {
            for (_, tc) in partial_tool_calls.drain() {
                tool_calls.push(tc);
            }
        }
        ai_metrics().record_success(0, 0, !tool_calls.is_empty());
        Ok(StreamResponse {
            content,
            reasoning_content,
            input_tokens: 0,
            output_tokens: 0,
            tool_calls,
            chunks,
        })
    };
    // 120s timeout for the entire stream
    match tokio::time::timeout(std::time::Duration::from_secs(120), stream_fut).await {
        Ok(result) => result,
        Err(_) => Err(AgentError::Timeout {
            task_id: 0,
            seconds: 120,
        }),
    }
 }
--- a/libs/agent/client/types.rs
+++ b/libs/agent/client/types.rs
@ -1,240 +0,0 @@
 //! Internal message types for OpenAI-compatible chat completion API.
 //!
 //! Uses plain structs with `role: String` instead of an enum — easier to serialize,
 //! and the downstream code only constructs specific variants anyway.
 use serde::{Deserialize, Serialize};
 /// A message in a chat completion request.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ChatRequestMessage {
    /// One of "system", "user", "assistant", "tool", "developer", "function"
    pub role: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
    /// Required for "tool" role messages
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_call_id: Option<String>,
    /// Tool calls for "assistant" role messages
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
 }
 impl ChatRequestMessage {
    pub fn system(content: impl Into<String>) -> Self {
        Self {
            role: "system".into(),
            content: Some(content.into()),
            name: None,
            tool_call_id: None,
            tool_calls: None,
        }
    }
    pub fn user(content: impl Into<String>) -> Self {
        Self {
            role: "user".into(),
            content: Some(content.into()),
            name: None,
            tool_call_id: None,
            tool_calls: None,
        }
    }
    pub fn assistant(content: Option<String>, tool_calls: Option<Vec<ToolCall>>) -> Self {
        Self {
            role: "assistant".into(),
            content,
            name: None,
            tool_call_id: None,
            tool_calls,
        }
    }
    pub fn tool(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
        Self {
            role: "tool".into(),
            content: Some(content.into()),
            name: None,
            tool_call_id: Some(tool_call_id.into()),
            tool_calls: None,
        }
    }
    pub fn with_name(mut self, name: impl Into<String>) -> Self {
        self.name = Some(name.into());
        self
    }
    pub fn developer(content: impl Into<String>) -> Self {
        Self {
            role: "developer".into(),
            content: Some(content.into()),
            name: None,
            tool_call_id: None,
            tool_calls: None,
        }
    }
    /// Creates a function/assistant message with tool_calls (used to record the AI's tool call).
    pub fn with_tool_calls(mut self, tool_calls: Vec<ToolCall>) -> Self {
        self.tool_calls = Some(tool_calls);
        self
    }
 }
 /// A tool call within an assistant message.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ToolCall {
    pub id: String,
    #[serde(rename = "type")]
    pub type_: String,
    pub function: ToolCallFunction,
 }
 /// Function details within a tool call.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ToolCallFunction {
    pub name: String,
    pub arguments: String,
 }
 /// Chat completion request body (serialized to JSON for the HTTP API).
 #[derive(Debug, Clone, Serialize)]
 pub struct ChatCompletionRequest {
    pub model: String,
    pub messages: Vec<ChatRequestMessage>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_completion_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub frequency_penalty: Option<f64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub presence_penalty: Option<f64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffort>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tools: Option<Vec<serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_choice: Option<serde_json::Value>,
 }
 impl ChatCompletionRequest {
    pub fn with_stream(mut self) -> Self {
        self.stream = Some(true);
        self
    }
 }
 /// Reasoning effort level for supported models.
 #[derive(Debug, Clone, Serialize)]
 #[serde(rename_all = "lowercase")]
 pub enum ReasoningEffort {
    High,
 }
 // ── Response types (non-streaming) ──
 /// Chat completion response (non-streaming). Deserialize-only from the API JSON.
 #[derive(Debug, Clone, Deserialize)]
 pub struct ChatCompletionResponse {
    #[serde(default)]
    pub id: Option<String>,
    #[serde(default)]
    pub model: Option<String>,
    pub choices: Vec<Choice>,
    #[serde(default)]
    pub usage: Option<Usage>,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct Choice {
    pub index: u32,
    pub message: ResponseMessage,
    pub finish_reason: Option<String>,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct ResponseMessage {
    pub role: Option<String>,
    pub content: Option<String>,
    #[serde(default)]
    pub tool_calls: Option<Vec<ResponseToolCall>>,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct ResponseToolCall {
    pub id: String,
    #[serde(rename = "type")]
    pub type_: String,
    pub function: ResponseToolCallFunction,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct ResponseToolCallFunction {
    pub name: String,
    pub arguments: String,
 }
 /// Token usage from the response.
 #[derive(Debug, Clone, Deserialize)]
 pub struct Usage {
    #[serde(rename = "prompt_tokens", alias = "input_tokens")]
    pub prompt_tokens: u64,
    #[serde(rename = "completion_tokens", alias = "output_tokens")]
    pub completion_tokens: u64,
 }
 // ── Streaming types ──
 /// A chunk from a streaming chat completion (SSE `data:` lines).
 #[derive(Debug, Clone, Deserialize)]
 pub struct StreamChunk {
    #[serde(default)]
    pub id: Option<String>,
    #[serde(default)]
    pub model: Option<String>,
    pub choices: Vec<StreamChoice>,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct StreamChoice {
    pub delta: Delta,
    pub finish_reason: Option<String>,
    pub index: u32,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct Delta {
    #[serde(default)]
    pub role: Option<String>,
    #[serde(default)]
    pub content: Option<String>,
    #[serde(default)]
    pub tool_calls: Option<Vec<StreamToolCall>>,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct StreamToolCall {
    pub index: u32,
    #[serde(default)]
    pub id: Option<String>,
    #[serde(rename = "type", default)]
    pub type_: Option<String>,
    pub function: Option<StreamToolCallFunction>,
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct StreamToolCallFunction {
    #[serde(default)]
    pub name: Option<String>,
    #[serde(default)]
    pub arguments: Option<String>,
 }
--- a/libs/agent/compact/auth_fetch.rs
+++ b/libs/agent/compact/auth_fetch.rs
@ -1,39 +0,0 @@
 use crate::AgentError;
 use models::Expr;
 use models::rooms::room_message::{
    Column as RmCol, Entity as RoomMessage, Model as RoomMessageModel,
 };
 use sea_orm::*;
 impl super::CompactService {
    pub async fn fetch_room_messages_secure(
        &self,
        room_id: uuid::Uuid,
        requester_id: uuid::Uuid,
    ) -> Result<Vec<RoomMessageModel>, AgentError> {
        use models::rooms::{RoomAccess, RoomUserState};
        RoomMessage::find()
            .filter(RmCol::Room.eq(room_id))
            .filter(
                Condition::any()
                    .add(Expr::exists(
                        RoomUserState::find()
                            .filter(models::rooms::room_user_state::Column::Room.eq(room_id))
                            .filter(models::rooms::room_user_state::Column::User.eq(requester_id))
                            .into_query(),
                    ))
                    .add(Expr::exists(
                        RoomAccess::find()
                            .filter(models::rooms::room_access::Column::Room.eq(room_id))
                            .filter(models::rooms::room_access::Column::User.eq(requester_id))
                            .into_query(),
                    )),
            )
            .order_by_asc(RmCol::Seq)
            .limit(10000)
            .all(&self.db)
            .await
            .map_err(|e| AgentError::Internal(e.to_string()))
    }
 }
--- a/libs/agent/compact/helpers.rs
+++ b/libs/agent/compact/helpers.rs
@ -1,45 +0,0 @@
 use super::types::{CompactSummary, MessageSummary};
 pub fn messages_to_text<F>(
    messages: &[models::rooms::room_message::Model],
    sender_mapper: F,
 ) -> String
 where
    F: Fn(&models::rooms::room_message::Model) -> String,
 {
    messages
        .iter()
        .map(|m| {
            let sender = sender_mapper(m);
            format!("[{}] {}: {}", m.send_at, sender, m.content)
        })
        .collect::<Vec<_>>()
        .join("\n")
 }
 pub fn retained_as_text(retained: &[MessageSummary]) -> String {
    retained
        .iter()
        .map(|m| format!("[{}] {}: {}", m.send_at, m.sender_name, m.content))
        .collect::<Vec<_>>()
        .join("\n")
 }
 pub fn summary_content(summary: &CompactSummary) -> String {
    if summary.summary.is_empty() {
        format!(
            "## Recent conversation ({} messages)\n\n{}",
            summary.retained.len(),
            retained_as_text(&summary.retained)
        )
    } else {
        format!(
            "## Earlier conversation ({} messages summarised)\n{}\n\n\
             ## Most recent {} messages\n\n{}",
            summary.messages_compressed,
            summary.summary,
            summary.retained.len(),
            retained_as_text(&summary.retained)
        )
    }
 }
--- a/libs/agent/compact/mod.rs
+++ b/libs/agent/compact/mod.rs
@ -1,56 +0,0 @@
 //! Context compaction for AI sessions and room message history.
 pub mod auth_fetch;
 pub mod helpers;
 pub mod room_compactor;
 pub mod summarizer;
 pub mod types;
 use sea_orm::DatabaseConnection;
 pub use types::{
    CompactConfig, CompactLevel, CompactSummary, MessageSummary, RoomCompactContext,
    RoomCompactRecord, ThresholdResult,
 };
 #[derive(Clone)]
 pub struct CompactService {
    db: DatabaseConnection,
    ai_client_config: crate::client::AiClientConfig,
    model: String,
    model_context_limit: Option<usize>,
 }
 impl CompactService {
    pub fn new(
        db: DatabaseConnection,
        ai_client_config: crate::client::AiClientConfig,
        model: String,
    ) -> Self {
        Self {
            db,
            ai_client_config,
            model,
            model_context_limit: None,
        }
    }
    pub fn for_model(&self, model: impl Into<String>) -> Self {
        Self {
            db: self.db.clone(),
            ai_client_config: self.ai_client_config.clone(),
            model: model.into(),
            model_context_limit: self.model_context_limit,
        }
    }
    pub fn with_model_context_limit(mut self, model_context_limit: Option<usize>) -> Self {
        self.model_context_limit = model_context_limit.filter(|limit| *limit > 0);
        self
    }
    pub fn for_model_entry(&self, model: &models::agents::model::Model) -> Self {
        self.for_model(model.name.clone())
            .with_model_context_limit(Some(model.context_length.max(0) as usize))
    }
 }
--- a/libs/agent/compact/room_compactor.rs
+++ b/libs/agent/compact/room_compactor.rs
@ -1,422 +0,0 @@
 use models::rooms::room_message::{
    Column as RmCol, Entity as RoomMessage, Model as RoomMessageModel,
 };
 use sea_orm::ColumnTrait;
 use sea_orm::{ConnectionTrait, EntityTrait, QueryFilter, QueryOrder, QuerySelect};
 use crate::compact::types::{CompactConfig, CompactLevel, RoomCompactContext, RoomCompactRecord};
 use crate::tokent::resolve_usage;
 use crate::{AgentError, CompactSummary, MessageSummary};
 impl super::CompactService {
    pub async fn latest_room_compact_record(
        &self,
        room_id: uuid::Uuid,
    ) -> Result<Option<RoomCompactRecord>, AgentError> {
        let stmt = sea_orm::Statement::from_sql_and_values(
            sea_orm::DbBackend::Postgres,
            "SELECT id, room, from_seq, to_seq, summary, message_count, source_message_ids, created_at \
             FROM room_compact_summary WHERE room = $1 ORDER BY to_seq DESC, created_at DESC LIMIT 1",
            vec![room_id.into()],
        );
        let Some(row) = self
            .db
            .query_one_raw(stmt)
            .await
            .map_err(|e| AgentError::Internal(e.to_string()))?
        else {
            return Ok(None);
        };
        let source_json: serde_json::Value = row
            .try_get("", "source_message_ids")
            .map_err(|e| AgentError::Internal(e.to_string()))?;
        let source_message_ids = source_json
            .as_array()
            .map(|ids| {
                ids.iter()
                    .filter_map(|v| v.as_str())
                    .filter_map(|s| uuid::Uuid::parse_str(s).ok())
                    .collect::<Vec<_>>()
            })
            .unwrap_or_default();
        Ok(Some(RoomCompactRecord {
            id: row
                .try_get("", "id")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
            room_id: row
                .try_get("", "room")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
            from_seq: row
                .try_get("", "from_seq")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
            to_seq: row
                .try_get("", "to_seq")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
            summary: row
                .try_get("", "summary")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
            message_count: row
                .try_get("", "message_count")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
            source_message_ids,
            created_at: row
                .try_get("", "created_at")
                .map_err(|e| AgentError::Internal(e.to_string()))?,
        }))
    }
    async fn insert_room_compact_record(
        &self,
        room_id: uuid::Uuid,
        from_seq: i64,
        to_seq: i64,
        summary: &str,
        source_message_ids: &[uuid::Uuid],
    ) -> Result<RoomCompactRecord, AgentError> {
        let id = uuid::Uuid::new_v4();
        let now = chrono::Utc::now();
        let source_json = serde_json::Value::Array(
            source_message_ids
                .iter()
                .map(|id| serde_json::Value::String(id.to_string()))
                .collect(),
        );
        let stmt = sea_orm::Statement::from_sql_and_values(
            sea_orm::DbBackend::Postgres,
            "INSERT INTO room_compact_summary \
             (id, room, from_seq, to_seq, summary, message_count, source_message_ids, created_at, updated_at) \
             VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)",
            vec![
                id.into(),
                room_id.into(),
                from_seq.into(),
                to_seq.into(),
                summary.to_string().into(),
                (source_message_ids.len() as i32).into(),
                source_json.into(),
                now.into(),
                now.into(),
            ],
        );
        self.db
            .execute_raw(stmt)
            .await
            .map_err(|e| AgentError::Internal(e.to_string()))?;
        Ok(RoomCompactRecord {
            id,
            room_id,
            from_seq,
            to_seq,
            summary: summary.to_string(),
            message_count: source_message_ids.len() as i32,
            source_message_ids: source_message_ids.to_vec(),
            created_at: now,
        })
    }
    fn clean_dedupe_sort_messages(mut messages: Vec<RoomMessageModel>) -> Vec<RoomMessageModel> {
        messages.retain(|m| {
            m.revoked.is_none()
                && !m.content.trim().is_empty()
                && matches!(m.content_type, models::rooms::MessageContentType::Text)
        });
        messages.sort_by_key(|m| (m.seq, m.send_at));
        let mut seen = std::collections::HashSet::new();
        messages
            .into_iter()
            .filter(|m| {
                let normalized = m
                    .content
                    .split_whitespace()
                    .collect::<Vec<_>>()
                    .join(" ")
                    .to_lowercase();
                let key = format!("{}:{:?}:{}", m.sender_type, m.sender_id, normalized);
                seen.insert(key)
            })
            .collect()
    }
    fn resolve_retain_count(config: CompactConfig, estimated_tokens: usize) -> usize {
        let level = if config.auto_level {
            CompactLevel::auto_select(estimated_tokens, config.token_threshold)
        } else {
            config.default_level
        };
        level.retain_count()
    }
    pub async fn prepare_room_compact_context(
        &self,
        room_id: uuid::Uuid,
        requester_id: uuid::Uuid,
        user_names: Option<std::collections::HashMap<uuid::Uuid, String>>,
        config: CompactConfig,
    ) -> Result<RoomCompactContext, AgentError> {
        let latest = self.latest_room_compact_record(room_id).await?;
        let cutoff_seq = latest.as_ref().map(|r| r.to_seq);
        let previous_summary = latest.as_ref().map(|r| r.summary.as_str());
        let messages = self
            .fetch_room_messages_secure(room_id, requester_id)
            .await?;
        let messages = messages
            .into_iter()
            .filter(|m| cutoff_seq.map(|seq| m.seq > seq).unwrap_or(true))
            .collect::<Vec<_>>();
        let messages = Self::clean_dedupe_sort_messages(messages);
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();
        let user_name_map = match user_names {
            Some(map) => map,
            None => self.get_user_name_map(&user_ids).await?,
        };
        let sender_mapper = |m: &RoomMessageModel| {
            if let Some(user_id) = m.sender_id {
                if let Some(username) = user_name_map.get(&user_id) {
                    return username.clone();
                }
            }
            m.sender_type.to_string()
        };
        let incremental_text = crate::compact::helpers::messages_to_text(&messages, sender_mapper);
        let estimate_input = match previous_summary {
            Some(summary) if !summary.is_empty() => format!("{}\n{}", summary, incremental_text),
            _ => incremental_text.clone(),
        };
        let estimated_tokens = crate::tokent::count_message_text(&estimate_input, &self.model)
            .unwrap_or_else(|_| estimate_input.len() / 4);
        let retain_count = Self::resolve_retain_count(config, estimated_tokens);
        if estimated_tokens >= config.token_threshold && messages.len() > retain_count {
            let split_index = messages.len().saturating_sub(retain_count);
            let (to_summarize, retained_messages) = messages.split_at(split_index);
            let from_seq = to_summarize
                .first()
                .map(|m| m.seq)
                .unwrap_or(cutoff_seq.unwrap_or(0) + 1);
            let to_seq = to_summarize.last().map(|m| m.seq).unwrap_or(from_seq);
            let source_ids: Vec<uuid::Uuid> = to_summarize.iter().map(|m| m.id).collect();
            let (summary, _usage) = self
                .summarize_room_increment(previous_summary, to_summarize, config.max_summary_tokens)
                .await?;
            let record = self
                .insert_room_compact_record(room_id, from_seq, to_seq, &summary, &source_ids)
                .await?;
            let retained = retained_messages
                .iter()
                .map(|m| Self::message_to_summary(m, &user_name_map))
                .collect();
            return Ok(RoomCompactContext {
                room_id,
                cutoff_seq: Some(record.to_seq),
                summary: Some(record.summary),
                retained,
                estimated_tokens,
                compacted: true,
            });
        }
        let retained = messages
            .iter()
            .rev()
            .take(50)
            .collect::<Vec<_>>()
            .into_iter()
            .rev()
            .map(|m| Self::message_to_summary(m, &user_name_map))
            .collect();
        Ok(RoomCompactContext {
            room_id,
            cutoff_seq,
            summary: latest.map(|r| r.summary),
            retained,
            estimated_tokens,
            compacted: false,
        })
    }
    pub async fn compact_room(
        &self,
        room_id: uuid::Uuid,
        level: CompactLevel,
        user_names: Option<std::collections::HashMap<uuid::Uuid, String>>,
        requester_id: uuid::Uuid,
        context_window_tokens: i32,
        compaction_max_summary_ratio: f32,
    ) -> Result<CompactSummary, AgentError> {
        let messages = self
            .fetch_room_messages_secure(room_id, requester_id)
            .await?;
        if messages.is_empty() {
            let room_exists = models::rooms::room::Entity::find_by_id(room_id)
                .one(&self.db)
                .await
                .map_err(|e| AgentError::Internal(e.to_string()))?
                .is_some();
            if room_exists {
                return Err(AgentError::Internal("Access denied or room empty".into()));
            } else {
                return Err(AgentError::Internal("Room not found".into()));
            }
        }
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();
        let user_name_map = match user_names {
            Some(map) => map,
            None => self.get_user_name_map(&user_ids).await?,
        };
        if messages.len() <= level.retain_count() {
            let retained: Vec<MessageSummary> = messages
                .iter()
                .map(|m| Self::message_to_summary(m, &user_name_map))
                .collect();
            return Ok(CompactSummary {
                session_id: uuid::Uuid::new_v4(),
                room_id,
                retained,
                summary: String::new(),
                compacted_at: chrono::Utc::now(),
                messages_compressed: 0,
                usage: None,
            });
        }
        let retain_count = level.retain_count();
        let split_index = messages.len().saturating_sub(retain_count);
        let (to_summarize, retained_messages) = messages.split_at(split_index);
        let retained: Vec<MessageSummary> = retained_messages
            .iter()
            .map(|m| Self::message_to_summary(m, &user_name_map))
            .collect();
        let max_summary_tokens = CompactConfig::summary_token_budget(
            context_window_tokens.max(0) as usize,
            compaction_max_summary_ratio,
        );
        let (summary, remote_usage) = self
            .summarize_messages(to_summarize, max_summary_tokens)
            .await?;
        let summarized_text = to_summarize
            .iter()
            .map(|m| m.content.as_str())
            .collect::<Vec<_>>()
            .join("\n");
        let usage = resolve_usage(remote_usage, &self.model, &summarized_text, &summary);
        Ok(CompactSummary {
            session_id: uuid::Uuid::new_v4(),
            room_id,
            retained,
            summary,
            compacted_at: chrono::Utc::now(),
            messages_compressed: to_summarize.len(),
            usage: Some(usage),
        })
    }
    pub async fn compact_session(
        &self,
        session_id: uuid::Uuid,
        level: CompactLevel,
        user_names: Option<std::collections::HashMap<uuid::Uuid, String>>,
        context_window_tokens: i32,
        compaction_max_summary_ratio: f32,
    ) -> Result<CompactSummary, AgentError> {
        let messages: Vec<RoomMessageModel> = RoomMessage::find()
            .filter(RmCol::Room.eq(session_id))
            .order_by_asc(RmCol::Seq)
            .limit(10000)
            .all(&self.db)
            .await
            .map_err(|e| AgentError::Internal(e.to_string()))?;
        if messages.is_empty() {
            return Err(AgentError::Internal("session has no messages".into()));
        }
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();
        let user_name_map = match user_names {
            Some(map) => map,
            None => self.get_user_name_map(&user_ids).await?,
        };
        if messages.len() <= level.retain_count() {
            let retained: Vec<MessageSummary> = messages
                .iter()
                .map(|m| Self::message_to_summary(m, &user_name_map))
                .collect();
            return Ok(CompactSummary {
                session_id,
                room_id: uuid::Uuid::nil(),
                retained,
                summary: String::new(),
                compacted_at: chrono::Utc::now(),
                messages_compressed: 0,
                usage: None,
            });
        }
        let retain_count = level.retain_count();
        let split_index = messages.len().saturating_sub(retain_count);
        let (to_summarize, retained_messages) = messages.split_at(split_index);
        let retained: Vec<MessageSummary> = retained_messages
            .iter()
            .map(|m| Self::message_to_summary(m, &user_name_map))
            .collect();
        let max_summary_tokens = CompactConfig::summary_token_budget(
            context_window_tokens.max(0) as usize,
            compaction_max_summary_ratio,
        );
        let (summary, remote_usage) = self
            .summarize_messages(to_summarize, max_summary_tokens)
            .await?;
        let summarized_text = to_summarize
            .iter()
            .map(|m| m.content.as_str())
            .collect::<Vec<_>>()
            .join("\n");
        let usage = resolve_usage(remote_usage, &self.model, &summarized_text, &summary);
        Ok(CompactSummary {
            session_id,
            room_id: uuid::Uuid::nil(),
            retained,
            summary,
            compacted_at: chrono::Utc::now(),
            messages_compressed: to_summarize.len(),
            usage: Some(usage),
        })
    }
 }
--- a/libs/agent/compact/summarizer.rs
+++ b/libs/agent/compact/summarizer.rs
@ -1,513 +0,0 @@
 use models::rooms::room_message::Model as RoomMessageModel;
 use models::users::user::{Column as UserCol, Entity as User};
 use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
 use crate::AgentError;
 use crate::client::call_with_params;
 use crate::client::types::ChatRequestMessage;
 use crate::compact::types::{CompactConfig, MessageSummary};
 use crate::tokent::{TokenUsage, count_message_text};
 const DEFAULT_MODEL_CONTEXT_LIMIT: usize = 128_000;
 const MODEL_INPUT_RATIO_NUMERATOR: usize = 85;
 const MODEL_INPUT_RATIO_DENOMINATOR: usize = 100;
 const MIN_ROUND_SUMMARY_TOKENS: usize = 64;
 #[derive(Clone, Copy)]
 enum SummaryKind {
    Conversation,
    RoomIncrement,
 }
 impl super::CompactService {
    pub async fn summarize_room_increment(
        &self,
        previous_summary: Option<&str>,
        messages: &[RoomMessageModel],
        max_summary_tokens: usize,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();
        let user_name_map = self.get_user_name_map(&user_ids).await?;
        let blocks = messages
            .iter()
            .map(|m| {
                let sender = if let Some(user_id) = m.sender_id {
                    user_name_map
                        .get(&user_id)
                        .cloned()
                        .unwrap_or_else(|| m.sender_type.to_string())
                } else {
                    m.sender_type.to_string()
                };
                format!("[{}] {}: {}", m.send_at, sender, m.content)
            })
            .collect::<Vec<_>>();
        self.summarize_blocks_with_optional_previous(
            blocks,
            previous_summary,
            max_summary_tokens,
            SummaryKind::RoomIncrement,
        )
        .await
    }
    pub async fn summarize_messages(
        &self,
        messages: &[RoomMessageModel],
        max_summary_tokens: usize,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let user_ids: Vec<uuid::Uuid> = messages
            .iter()
            .filter_map(|m| m.sender_id)
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();
        let user_name_map = self.get_user_name_map(&user_ids).await?;
        let blocks = messages
            .iter()
            .map(|m| {
                let sender = if let Some(user_id) = m.sender_id {
                    user_name_map
                        .get(&user_id)
                        .cloned()
                        .unwrap_or_else(|| m.sender_type.to_string())
                } else {
                    m.sender_type.to_string()
                };
                format!("[{}] {}: {}", m.send_at, sender, m.content)
            })
            .collect::<Vec<_>>();
        self.summarize_blocks_with_optional_previous(
            blocks,
            None,
            max_summary_tokens,
            SummaryKind::Conversation,
        )
        .await
    }
    pub fn message_to_summary(
        m: &RoomMessageModel,
        user_name_map: &std::collections::HashMap<uuid::Uuid, String>,
    ) -> MessageSummary {
        let sender_name = if let Some(user_id) = m.sender_id {
            user_name_map
                .get(&user_id)
                .cloned()
                .unwrap_or_else(|| m.sender_type.to_string())
        } else {
            m.sender_type.to_string()
        };
        MessageSummary {
            id: m.id,
            sender_type: m.sender_type.clone(),
            sender_id: m.sender_id,
            sender_name,
            content: m.content.clone(),
            content_type: m.content_type.clone(),
            tool_call_id: None,
            send_at: m.send_at,
        }
    }
    pub async fn get_user_name_map(
        &self,
        user_ids: &[uuid::Uuid],
    ) -> Result<std::collections::HashMap<uuid::Uuid, String>, AgentError> {
        use std::collections::HashMap;
        let mut map = HashMap::new();
        if !user_ids.is_empty() {
            let users = User::find()
                .filter(UserCol::Uid.is_in(user_ids.to_vec()))
                .all(&self.db)
                .await
                .map_err(|e| AgentError::Internal(e.to_string()))?;
            for user in users {
                map.insert(user.uid, user.username);
            }
        }
        Ok(map)
    }
    async fn summarize_blocks_with_optional_previous(
        &self,
        blocks: Vec<String>,
        previous_summary: Option<&str>,
        max_summary_tokens: usize,
        kind: SummaryKind,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let final_budget = Self::final_summary_budget(max_summary_tokens);
        let input_budget = self.safe_model_input_budget();
        let round_budget = Self::round_summary_budget(final_budget, input_budget);
        let mut total_usage = TokenUsage::default();
        let mut has_usage = false;
        let fitted_chunks =
            self.split_blocks_to_fit(blocks, input_budget, round_budget, kind, false)?;
        let mut partial_summaries = Vec::new();
        for chunk in fitted_chunks {
            let prompt = self.build_prompt(kind, false, &chunk, round_budget);
            let (summary, usage) = self
                .invoke_summary_prompt(&prompt, round_budget, Self::temperature_for(kind))
                .await?;
            Self::accumulate_usage(&mut total_usage, &mut has_usage, usage);
            partial_summaries.push(summary);
        }
        if let Some(previous) = previous_summary
            .map(str::trim)
            .filter(|summary| !summary.is_empty())
        {
            partial_summaries.insert(0, previous.to_string());
        }
        if partial_summaries.is_empty() {
            return Ok((String::new(), None));
        }
        if partial_summaries.len() == 1 && previous_summary.is_none() {
            return Ok((
                partial_summaries.remove(0),
                if has_usage { Some(total_usage) } else { None },
            ));
        }
        let final_summary = self
            .merge_summary_rounds(
                partial_summaries,
                final_budget,
                round_budget,
                kind,
                &mut total_usage,
                &mut has_usage,
            )
            .await?;
        Ok((
            final_summary,
            if has_usage { Some(total_usage) } else { None },
        ))
    }
    async fn merge_summary_rounds(
        &self,
        mut summaries: Vec<String>,
        final_budget: usize,
        round_budget: usize,
        kind: SummaryKind,
        total_usage: &mut TokenUsage,
        has_usage: &mut bool,
    ) -> Result<String, AgentError> {
        let input_budget = self.safe_model_input_budget();
        while summaries.len() > 1 {
            let current_budget = if summaries.len() <= 2 {
                final_budget
            } else {
                round_budget
            };
            let mut next_round = Vec::new();
            let mut idx = 0usize;
            while idx < summaries.len() {
                if idx + 1 >= summaries.len() {
                    next_round.push(summaries[idx].clone());
                    idx += 1;
                    continue;
                }
                let pair = vec![summaries[idx].clone(), summaries[idx + 1].clone()];
                let fitted_pairs =
                    self.split_blocks_to_fit(pair, input_budget, current_budget, kind, true)?;
                for pair_text in fitted_pairs {
                    let prompt = self.build_prompt(kind, true, &pair_text, current_budget);
                    let (summary, usage) = self
                        .invoke_summary_prompt(&prompt, current_budget, Self::temperature_for(kind))
                        .await?;
                    Self::accumulate_usage(total_usage, has_usage, usage);
                    next_round.push(summary);
                }
                idx += 2;
            }
            summaries = next_round;
        }
        summaries
            .pop()
            .ok_or_else(|| AgentError::Internal("summary merge produced no output".into()))
    }
    async fn invoke_summary_prompt(
        &self,
        prompt: &str,
        max_summary_tokens: usize,
        temperature: f32,
    ) -> Result<(String, Option<TokenUsage>), AgentError> {
        let response = call_with_params(
            &[ChatRequestMessage::user(prompt.to_string())],
            &self.model,
            &self.ai_client_config,
            temperature,
            max_summary_tokens as u32,
            None,
            None,
            None,
        )
        .await
        .map_err(|e| AgentError::OpenAi(e.to_string()))?;
        let usage =
            TokenUsage::from_remote(response.input_tokens as u32, response.output_tokens as u32);
        Ok((response.content, usage))
    }
    fn split_blocks_to_fit(
        &self,
        blocks: Vec<String>,
        input_budget: usize,
        max_summary_tokens: usize,
        kind: SummaryKind,
        is_merge: bool,
    ) -> Result<Vec<String>, AgentError> {
        let mut chunks = Vec::new();
        self.collect_fitting_chunks(
            blocks,
            input_budget,
            max_summary_tokens,
            kind,
            is_merge,
            &mut chunks,
        )?;
        Ok(chunks)
    }
    fn collect_fitting_chunks(
        &self,
        blocks: Vec<String>,
        input_budget: usize,
        max_summary_tokens: usize,
        kind: SummaryKind,
        is_merge: bool,
        chunks: &mut Vec<String>,
    ) -> Result<(), AgentError> {
        let body = Self::join_blocks(&blocks, is_merge);
        let prompt = self.build_prompt(kind, is_merge, &body, max_summary_tokens);
        if self.estimate_tokens(&prompt) <= input_budget {
            chunks.push(body);
            return Ok(());
        }
        if blocks.len() > 1 {
            let mid = blocks.len() / 2;
            self.collect_fitting_chunks(
                blocks[..mid].to_vec(),
                input_budget,
                max_summary_tokens,
                kind,
                is_merge,
                chunks,
            )?;
            self.collect_fitting_chunks(
                blocks[mid..].to_vec(),
                input_budget,
                max_summary_tokens,
                kind,
                is_merge,
                chunks,
            )?;
            return Ok(());
        }
        let single = blocks
            .into_iter()
            .next()
            .ok_or_else(|| AgentError::Internal("cannot split empty summary block".into()))?;
        let (left, right) = Self::split_text_in_half(&single)?;
        self.collect_fitting_chunks(
            vec![left],
            input_budget,
            max_summary_tokens,
            kind,
            is_merge,
            chunks,
        )?;
        self.collect_fitting_chunks(
            vec![right],
            input_budget,
            max_summary_tokens,
            kind,
            is_merge,
            chunks,
        )?;
        Ok(())
    }
    fn build_prompt(
        &self,
        kind: SummaryKind,
        is_merge: bool,
        body: &str,
        max_summary_tokens: usize,
    ) -> String {
        match (kind, is_merge) {
            (SummaryKind::Conversation, false) => format!(
                "Summarise the following conversation concisely, preserving all key facts, \
                 decisions, and any pending or in-progress work. \
                 The summary MUST NOT exceed {} tokens. \
                 Use this format:\n\n\
                 **Summary:** <one-paragraph overview>\n\
                 **Key decisions:** <bullet list or 'none'>\n\
                 **Open items:** <bullet list or 'none'>\n\n\
                 Conversation:\n\n{}",
                max_summary_tokens, body
            ),
            (SummaryKind::Conversation, true) => format!(
                "Merge the following partial conversation summaries into a single concise summary. \
                 Deduplicate overlap, preserve chronology, and keep all concrete decisions, \
                 status updates, and unresolved work. The summary MUST NOT exceed {} tokens. \
                 Use this format:\n\n\
                 **Summary:** <one-paragraph overview>\n\
                 **Key decisions:** <bullet list or 'none'>\n\
                 **Open items:** <bullet list or 'none'>\n\n\
                 Partial summaries:\n\n{}",
                max_summary_tokens, body
            ),
            (SummaryKind::RoomIncrement, false) => format!(
                "Create an incremental room summary from the new messages below. \
                 Deduplicate repeated messages, clean noise, keep chronological order, and preserve \
                 decisions, facts, assignments/owners, unresolved questions, and concrete next steps. \
                 The result MUST NOT exceed {} tokens.\n\n\
                 Format:\n\
                 **Summary:** <compact overview>\n\
                 **Decisions:** <bullets or 'none'>\n\
                 **Owners:** <bullets with owner -> task or 'none'>\n\
                 **Open items:** <bullets or 'none'>\n\n\
                 New messages:\n\n{}",
                max_summary_tokens, body
            ),
            (SummaryKind::RoomIncrement, true) => format!(
                "Merge the following partial room summaries into one room summary. Deduplicate overlap, \
                 keep chronology, preserve decisions, facts, assignments/owners, unresolved questions, \
                 and concrete next steps. The result MUST NOT exceed {} tokens.\n\n\
                 Format:\n\
                 **Summary:** <compact overview>\n\
                 **Decisions:** <bullets or 'none'>\n\
                 **Owners:** <bullets with owner -> task or 'none'>\n\
                 **Open items:** <bullets or 'none'>\n\n\
                 Partial summaries:\n\n{}",
                max_summary_tokens, body
            ),
        }
    }
    fn join_blocks(blocks: &[String], is_merge: bool) -> String {
        if is_merge {
            blocks
                .iter()
                .enumerate()
                .map(|(index, block)| format!("### Partial Summary {}\n{}", index + 1, block))
                .collect::<Vec<_>>()
                .join("\n\n")
        } else {
            blocks.join("\n")
        }
    }
    fn split_text_in_half(text: &str) -> Result<(String, String), AgentError> {
        if text.chars().count() < 2 {
            return Err(AgentError::Internal(
                "single summary block exceeds input budget and cannot be split".into(),
            ));
        }
        let midpoint = text.len() / 2;
        let mut split_at = text.floor_char_boundary(midpoint);
        if split_at == 0 || split_at >= text.len() {
            split_at = text.ceil_char_boundary(midpoint);
        }
        if split_at == 0 || split_at >= text.len() {
            return Err(AgentError::Internal(
                "failed to split oversized summary block".into(),
            ));
        }
        Ok((text[..split_at].to_string(), text[split_at..].to_string()))
    }
    fn estimate_tokens(&self, text: &str) -> usize {
        count_message_text(text, &self.model).unwrap_or_else(|_| (text.len() / 4).max(1))
    }
    fn safe_model_input_budget(&self) -> usize {
        Self::safe_model_input_budget_from_limit(self.model_context_limit)
    }
    fn final_summary_budget(max_summary_tokens: usize) -> usize {
        max_summary_tokens.clamp(
            CompactConfig::MIN_SUMMARY_TOKENS,
            CompactConfig::MAX_SUMMARY_TOKENS,
        )
    }
    fn round_summary_budget(final_budget: usize, input_budget: usize) -> usize {
        final_budget.min((input_budget / 8).max(MIN_ROUND_SUMMARY_TOKENS))
    }
    fn temperature_for(kind: SummaryKind) -> f32 {
        match kind {
            SummaryKind::Conversation => 0.3,
            SummaryKind::RoomIncrement => 0.2,
        }
    }
    fn safe_model_input_budget_from_limit(model_context_limit: Option<usize>) -> usize {
        let context_limit = model_context_limit
            .unwrap_or(DEFAULT_MODEL_CONTEXT_LIMIT)
            .max(1);
        context_limit
            .saturating_mul(MODEL_INPUT_RATIO_NUMERATOR)
            .saturating_div(MODEL_INPUT_RATIO_DENOMINATOR)
            .max(1)
    }
    fn accumulate_usage(total: &mut TokenUsage, has_usage: &mut bool, usage: Option<TokenUsage>) {
        if let Some(usage) = usage {
            total.input_tokens += usage.input_tokens;
            total.output_tokens += usage.output_tokens;
            *has_usage = true;
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::super::CompactService;
    #[test]
    fn room_summary_uses_eighty_five_percent_input_budget() {
        assert_eq!(
            CompactService::safe_model_input_budget_from_limit(Some(1000)),
            850
        );
    }
    #[test]
    fn oversized_text_is_split_in_half() {
        let (left, right) = CompactService::split_text_in_half("abcdefgh").unwrap();
        assert_eq!(format!("{}{}", left, right), "abcdefgh");
        assert!(!left.is_empty());
        assert!(!right.is_empty());
    }
 }
--- a/libs/agent/compact/types.rs
+++ b/libs/agent/compact/types.rs
@ -1,209 +0,0 @@
 use chrono::{DateTime, Utc};
 use models::rooms::{
    MessageContentType, MessageSenderType, room_message::Model as RoomMessageModel,
 };
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use uuid::Uuid;
 use crate::tokent::TokenUsage;
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct MessageSummary {
    pub id: Uuid,
    pub sender_type: MessageSenderType,
    pub sender_id: Option<Uuid>,
    pub sender_name: String,
    pub content: String,
    pub content_type: MessageContentType,
    /// Tool call ID extracted from message content JSON, if present.
    pub tool_call_id: Option<String>,
    pub send_at: DateTime<Utc>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CompactSummary {
    pub session_id: Uuid,
    pub room_id: Uuid,
    pub retained: Vec<MessageSummary>,
    pub summary: String,
    pub compacted_at: DateTime<Utc>,
    pub messages_compressed: usize,
    /// Token usage for the compaction AI call. `None` if usage data was unavailable.
    pub usage: Option<TokenUsage>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RoomCompactRecord {
    pub id: Uuid,
    pub room_id: Uuid,
    pub from_seq: i64,
    pub to_seq: i64,
    pub summary: String,
    pub message_count: i32,
    pub source_message_ids: Vec<Uuid>,
    pub created_at: DateTime<Utc>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RoomCompactContext {
    pub room_id: Uuid,
    pub cutoff_seq: Option<i64>,
    pub summary: Option<String>,
    pub retained: Vec<MessageSummary>,
    pub estimated_tokens: usize,
    pub compacted: bool,
 }
 #[derive(Debug, Clone, Copy)]
 pub enum CompactLevel {
    Light,
    Aggressive,
 }
 impl CompactLevel {
    pub fn retain_count(&self) -> usize {
        match self {
            CompactLevel::Light => 5,
            CompactLevel::Aggressive => 2,
        }
    }
    /// Auto-select level based on estimated token count and config.
    ///
    /// - `Light` (retain 5): when tokens are moderately over threshold
    /// - `Aggressive` (retain 2): when tokens are severely over threshold (2x+)
    pub fn auto_select(estimated_tokens: usize, threshold: usize) -> Self {
        if threshold == 0 {
            return CompactLevel::Light;
        }
        if estimated_tokens >= threshold * 2 {
            CompactLevel::Aggressive
        } else {
            CompactLevel::Light
        }
    }
 }
 /// Configuration for automatic compaction.
 #[derive(Debug, Clone, Copy)]
 pub struct CompactConfig {
    /// Only trigger compaction when estimated token count exceeds this.
    /// Set to 0 to disable threshold (always compact when messages > retain_count).
    pub token_threshold: usize,
    /// If true, auto-select level based on how far over the threshold we are.
    /// If false, always use `default_level`.
    pub auto_level: bool,
    /// Fallback level when `auto_level` is false.
    pub default_level: CompactLevel,
    /// Maximum tokens the summary may contain (enforced via prompt).
    pub max_summary_tokens: usize,
 }
 impl Default for CompactConfig {
    fn default() -> Self {
        Self {
            token_threshold: 100_000,
            auto_level: true,
            default_level: CompactLevel::Light,
            max_summary_tokens: 4096,
        }
    }
 }
 impl CompactConfig {
    pub const MIN_SUMMARY_TOKENS: usize = 256;
    pub const MAX_SUMMARY_TOKENS: usize = 4096;
    /// Build config from project context settings.
    pub fn from_project_setting(
        context_window_tokens: i32,
        compaction_threshold: f32,
        compaction_max_summary_ratio: f32,
    ) -> Self {
        let context_window_tokens = context_window_tokens.max(0) as usize;
        let threshold = (context_window_tokens as f32 * compaction_threshold.max(0.0)) as usize;
        Self {
            token_threshold: threshold,
            auto_level: true,
            default_level: CompactLevel::Light,
            max_summary_tokens: Self::summary_token_budget(
                context_window_tokens,
                compaction_max_summary_ratio,
            ),
        }
    }
    pub fn summary_token_budget(
        context_window_tokens: usize,
        compaction_max_summary_ratio: f32,
    ) -> usize {
        let ratio = compaction_max_summary_ratio.max(0.0);
        let raw_budget = (context_window_tokens as f32 * ratio) as usize;
        if raw_budget == 0 {
            Self::MIN_SUMMARY_TOKENS
        } else {
            raw_budget.clamp(Self::MIN_SUMMARY_TOKENS, Self::MAX_SUMMARY_TOKENS)
        }
    }
 }
 /// Result of a threshold check before deciding whether to compact.
 #[derive(Debug)]
 pub enum ThresholdResult {
    /// Token count is below threshold — skip compaction.
    Skip { estimated_tokens: usize },
    /// Token count exceeds threshold — compact with this level.
    Compact {
        estimated_tokens: usize,
        level: CompactLevel,
    },
 }
 impl From<RoomMessageModel> for MessageSummary {
    fn from(m: RoomMessageModel) -> Self {
        let sender_type = m.sender_type.clone();
        let content = m.content.clone();
        Self {
            id: m.id,
            sender_type: sender_type.clone(),
            sender_id: m.sender_id,
            sender_name: sender_type.to_string(),
            content,
            content_type: m.content_type.clone(),
            tool_call_id: Self::extract_tool_call_id(&m.content),
            send_at: m.send_at,
        }
    }
 }
 impl MessageSummary {
    fn extract_tool_call_id(content: &str) -> Option<String> {
        let content = content.trim();
        if let Ok(v) = serde_json::from_str::<Value>(content) {
            v.get("tool_call_id")
                .and_then(|v| v.as_str())
                .map(|s| s.to_string())
        } else {
            None
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::CompactConfig;
    #[test]
    fn summary_budget_has_minimum_floor() {
        assert_eq!(CompactConfig::summary_token_budget(0, 0.0), 256);
        assert_eq!(CompactConfig::summary_token_budget(128_000, 0.0), 256);
        assert_eq!(CompactConfig::summary_token_budget(1_000, 0.01), 256);
    }
    #[test]
    fn summary_budget_is_capped() {
        assert_eq!(CompactConfig::summary_token_budget(128_000, 0.2), 4096);
    }
 }
--- a/libs/agent/embed/chunk.rs
+++ b/libs/agent/embed/chunk.rs
@ -1,63 +0,0 @@
 /// Maximum characters per chunk for embedding (approximates token limit).
 /// text-embedding-3-small: 8192 token limit.
 /// For CJK ~1 char/token, for English ~4 chars/token.
 /// Conservative limit: 7000 chars to leave room for all languages.
 const MAX_CHUNK_CHARS: usize = 7000;
 /// Split long text into chunks at paragraph/sentence boundaries.
 /// Returns at least one chunk even for empty text.
 /// Safe for multi-byte characters (uses char indices, not byte indices).
 pub fn chunk_text(text: &str) -> Vec<String> {
    if text.is_empty() {
        return vec![String::new()];
    }
    if text.len() <= MAX_CHUNK_CHARS {
        return vec![text.to_string()];
    }
    let char_indices: Vec<usize> = text.char_indices().map(|(i, _)| i).collect();
    let total_chars = char_indices.len();
    let mut chunks = Vec::new();
    let mut start_idx = 0;
    while start_idx < total_chars {
        let byte_start = char_indices[start_idx];
        let end_char_idx = (start_idx + MAX_CHUNK_CHARS).min(total_chars);
        let byte_end_candidate = char_indices[end_char_idx - 1]
            + text[char_indices[end_char_idx - 1]..]
                .chars()
                .next()
                .map(|c| c.len_utf8())
                .unwrap_or(1);
        if end_char_idx >= total_chars {
            chunks.push(text[byte_start..].to_string());
            break;
        }
        let search_range = &text[byte_start..byte_end_candidate];
        let break_at = search_range
            .rfind("\n\n")
            .map(|pos| pos + 2)
            .or_else(|| search_range.rfind('\n').map(|pos| pos + 1))
            .or_else(|| search_range.rfind(". ").map(|pos| pos + 1))
            .or_else(|| search_range.rfind("! ").map(|pos| pos + 1))
            .or_else(|| search_range.rfind("? ").map(|pos| pos + 1));
        if let Some(offset) = break_at {
            let byte_end = byte_start + offset;
            chunks.push(text[byte_start..byte_end].to_string());
            let mut advance = start_idx + 1;
            while advance < total_chars && char_indices[advance] < byte_end {
                advance += 1;
            }
            start_idx = advance;
        } else {
            chunks.push(text[byte_start..byte_end_candidate].to_string());
            start_idx = end_char_idx;
        }
    }
    chunks
 }
--- a/libs/agent/embed/client.rs
+++ b/libs/agent/embed/client.rs
@ -1,291 +0,0 @@
 use rig::client::EmbeddingsClient;
 use rig::embeddings::EmbeddingModel;
 use rig::providers::openai::Client as OpenAiClient;
 use serde::{Deserialize, Serialize};
 use crate::embed::qdrant::QdrantClient;
 pub struct EmbedClient {
    openai: OpenAiClient,
    qdrant: QdrantClient,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct EmbedVector {
    pub id: String,
    pub vector: Vec<f32>,
    pub payload: EmbedPayload,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct EmbedPayload {
    pub entity_type: String,
    pub entity_id: String,
    pub text: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub extra: Option<serde_json::Value>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct SearchResult {
    pub id: String,
    pub score: f32,
    pub payload: EmbedPayload,
 }
 impl EmbedClient {
    pub fn new(openai: OpenAiClient, qdrant: QdrantClient) -> Self {
        Self { openai, qdrant }
    }
    pub async fn embed_text(&self, text: &str, model: &str) -> crate::Result<Vec<f32>> {
        let model = self.openai.embedding_model(model);
        let embeddings = model
            .embed_texts(vec![text.to_string()])
            .await
            .map_err(|e| crate::AgentError::OpenAi(format!("embedding failed: {}", e)))?;
        embeddings
            .first()
            .map(|e| e.vec.iter().map(|v| *v as f32).collect())
            .ok_or_else(|| crate::AgentError::OpenAi("no embedding returned".into()))
    }
    pub async fn embed_batch(&self, texts: &[String], model: &str) -> crate::Result<Vec<Vec<f32>>> {
        let model = self.openai.embedding_model(model);
        let embeddings = model
            .embed_texts(texts.to_vec())
            .await
            .map_err(|e| crate::AgentError::OpenAi(format!("embedding batch failed: {}", e)))?;
        tracing::debug!(
            input_count = texts.len(),
            returned_count = embeddings.len(),
            "embed_batch: API returned"
        );
        let mut result = vec![Vec::new(); texts.len()];
        for (idx, embedding) in embeddings.into_iter().enumerate() {
            if idx < result.len() {
                result[idx] = embedding.vec.iter().map(|v| *v as f32).collect();
                continue;
            }
            tracing::warn!(
                idx,
                "embed_batch: provider returned more embeddings than requested"
            );
            break;
        }
        // Check for empty results
        let empty_count = result.iter().filter(|v| v.is_empty()).count();
        if empty_count > 0 {
            tracing::warn!(
                empty_count = empty_count,
                total = texts.len(),
                "embed_batch: some embeddings returned empty vectors"
            );
        }
        Ok(result)
    }
    pub async fn upsert(&self, points: Vec<EmbedVector>) -> crate::Result<()> {
        self.qdrant.upsert_points(points).await
    }
    /// Upsert points into a named collection (bypasses entity_type routing).
    pub async fn upsert_to_collection(
        &self,
        collection_name: &str,
        points: Vec<EmbedVector>,
    ) -> crate::Result<()> {
        self.qdrant
            .upsert_to_collection(collection_name, points)
            .await
    }
    pub async fn search(
        &self,
        query: &str,
        entity_type: &str,
        model: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        let vector = self.embed_text(query, model).await?;
        self.qdrant.search(&vector, entity_type, limit).await
    }
    pub async fn search_with_filter(
        &self,
        query: &str,
        entity_type: &str,
        model: &str,
        limit: usize,
        filter: qdrant_client::qdrant::Filter,
    ) -> crate::Result<Vec<SearchResult>> {
        let vector = self.embed_text(query, model).await?;
        self.qdrant
            .search_with_filter(&vector, entity_type, limit, filter)
            .await
    }
    pub async fn delete_by_entity_id(
        &self,
        entity_type: &str,
        entity_id: &str,
    ) -> crate::Result<()> {
        self.qdrant.delete_by_filter(entity_type, entity_id).await
    }
    pub async fn ensure_collection(&self, entity_type: &str, dimensions: u64) -> crate::Result<()> {
        self.qdrant.ensure_collection(entity_type, dimensions).await
    }
    pub async fn ensure_skill_collection(&self, dimensions: u64) -> crate::Result<()> {
        self.qdrant.ensure_skill_collection(dimensions).await
    }
    /// Ensure a room-specific memory collection exists.
    pub async fn ensure_room_memory_collection(
        &self,
        project_name: &str,
        room_id: &str,
        dimensions: u64,
    ) -> crate::Result<()> {
        self.qdrant
            .ensure_room_memory_collection(project_name, room_id, dimensions)
            .await
    }
    /// Embed and store a conversation memory (message) in Qdrant.
    /// Uses per-room collection: `room:{project_name}:{room_id}`.
    pub async fn embed_memory(
        &self,
        id: &str,
        text: &str,
        project_name: &str,
        room_id: &str,
        user_id: Option<&str>,
        model: &str,
    ) -> crate::Result<()> {
        // Compute embedding first to know dimensions
        let vector = self.embed_text(text, model).await?;
        let collection =
            crate::embed::qdrant::QdrantClient::room_memory_collection_name(project_name, room_id);
        // Auto-create the room collection with correct dimensions
        self.qdrant
            .ensure_room_memory_collection(project_name, room_id, vector.len() as u64)
            .await?;
        let point = EmbedVector {
            id: id.to_string(),
            vector,
            payload: EmbedPayload {
                entity_type: "memory".to_string(),
                entity_id: room_id.to_string(),
                text: text.to_string(),
                extra: serde_json::json!({ "user_id": user_id }).into(),
            },
        };
        self.qdrant
            .upsert_to_collection(&collection, vec![point])
            .await
    }
    /// Search memory embeddings by semantic similarity within a room.
    /// Searches the per-room collection directly — no post-filtering needed.
    pub async fn search_memories(
        &self,
        query: &str,
        model: &str,
        project_name: &str,
        room_id: &str,
        limit: usize,
        dimensions: u64,
    ) -> crate::Result<Vec<SearchResult>> {
        let vector = self.embed_text(query, model).await?;
        let collection =
            crate::embed::qdrant::QdrantClient::room_memory_collection_name(project_name, room_id);
        // Ensure collection exists (will be no-op if already created)
        self.qdrant
            .ensure_room_memory_collection(project_name, room_id, dimensions)
            .await?;
        self.qdrant
            .search_collection(&collection, &vector, limit)
            .await
    }
    pub async fn search_memories_after_seq(
        &self,
        query: &str,
        model: &str,
        project_name: &str,
        room_id: &str,
        limit: usize,
        dimensions: u64,
        after_seq: Option<i64>,
    ) -> crate::Result<Vec<SearchResult>> {
        let fetch_limit = if after_seq.is_some() {
            limit.saturating_mul(4).max(limit)
        } else {
            limit
        };
        let mut results = self
            .search_memories(query, model, project_name, room_id, fetch_limit, dimensions)
            .await?;
        if let Some(cutoff) = after_seq {
            results.retain(|r| {
                r.payload
                    .extra
                    .as_ref()
                    .and_then(|v| v.get("seq"))
                    .and_then(|v| v.as_i64())
                    .map(|seq| seq > cutoff)
                    .unwrap_or(false)
            });
        }
        results.truncate(limit);
        Ok(results)
    }
    /// Embed and store a skill in Qdrant.
    pub async fn embed_skill(
        &self,
        id: &str,
        name: &str,
        description: &str,
        content: &str,
        project_uuid: &str,
        model: &str,
    ) -> crate::Result<()> {
        let text = format!("{}: {} {}", name, description, content);
        let vector = self.embed_text(&text, model).await?;
        let point = EmbedVector {
            id: id.to_string(),
            vector,
            payload: EmbedPayload {
                entity_type: "skill".to_string(),
                entity_id: project_uuid.to_string(),
                text,
                extra: serde_json::json!({ "name": name, "description": description }).into(),
            },
        };
        self.qdrant.upsert_points(vec![point]).await
    }
    /// Search skill embeddings by semantic similarity within a project.
    pub async fn search_skills(
        &self,
        query: &str,
        model: &str,
        project_uuid: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        let vector = self.embed_text(query, model).await?;
        let mut results = self.qdrant.search_skill(&vector, limit + 1).await?;
        results.retain(|r| r.payload.entity_id == project_uuid);
        results.truncate(limit);
        Ok(results)
    }
 }
--- a/libs/agent/embed/embeddable.rs
+++ b/libs/agent/embed/embeddable.rs
@ -1,24 +0,0 @@
 use async_trait::async_trait;
 /// Trait for entities that can be embedded as vectors into Qdrant.
 #[async_trait]
 pub trait Embeddable {
    fn entity_type(&self) -> &'static str;
    fn to_text(&self) -> String;
    fn entity_id(&self) -> String;
 }
 /// Input struct for batch memory embedding into per-room Qdrant collections.
 #[derive(Debug, Clone)]
 pub struct EmbedMemoryInput {
    pub message_id: String,
    pub seq: i64,
    pub content: String,
    pub project_name: String,
    pub room_id: String,
    pub user_id: Option<String>,
    pub sender_type: String,
 }
 /// Input struct for batch tag embedding.
 pub use models::TagEmbedInput;
--- a/libs/agent/embed/entity_embed.rs
+++ b/libs/agent/embed/entity_embed.rs
@ -1,369 +0,0 @@
 use std::collections::HashMap;
 use super::chunk::chunk_text;
 use super::client::{EmbedPayload, EmbedVector};
 use super::embeddable::{EmbedMemoryInput, Embeddable};
 /// Embedding and upsert operations for entity vectors in Qdrant.
 impl super::EmbedService {
    pub async fn embed_issue(
        &self,
        id: &str,
        title: &str,
        body: Option<&str>,
    ) -> crate::Result<()> {
        let text = match body {
            Some(b) if !b.is_empty() => format!("{}\n\n{}", title, b),
            _ => title.to_string(),
        };
        tracing::debug!(issue_id = %id, text_len = text.len(), "embed_issue: calling embedding API");
        let vector = self.client.embed_text(&text, &self.model_name).await?;
        tracing::debug!(issue_id = %id, vec_dim = vector.len(), "embed_issue: embedding done");
        let point = EmbedVector {
            id: id.to_string(),
            vector,
            payload: EmbedPayload {
                entity_type: "issue".to_string(),
                entity_id: id.to_string(),
                text,
                extra: None,
            },
        };
        self.client.upsert(vec![point]).await?;
        tracing::info!(issue_id = %id, "embed_issue: upsert complete");
        Ok(())
    }
    pub async fn embed_repo(
        &self,
        id: &str,
        name: &str,
        description: Option<&str>,
    ) -> crate::Result<()> {
        let text = match description {
            Some(d) if !d.is_empty() => format!("{}: {}", name, d),
            _ => name.to_string(),
        };
        tracing::debug!(repo_id = %id, text_len = text.len(), "embed_repo: calling embedding API");
        let vector = self.client.embed_text(&text, &self.model_name).await?;
        tracing::debug!(repo_id = %id, vec_dim = vector.len(), "embed_repo: embedding done");
        let point = EmbedVector {
            id: id.to_string(),
            vector,
            payload: EmbedPayload {
                entity_type: "repo".to_string(),
                entity_id: id.to_string(),
                text,
                extra: None,
            },
        };
        self.client.upsert(vec![point]).await?;
        tracing::info!(repo_id = %id, "embed_repo: upsert complete");
        Ok(())
    }
    pub async fn embed_issues<T: Embeddable + Send + Sync>(
        &self,
        items: Vec<T>,
    ) -> crate::Result<()> {
        if items.is_empty() {
            return Ok(());
        }
        let texts: Vec<String> = items.iter().map(|i| i.to_text()).collect();
        tracing::debug!(count = texts.len(), "embed_issues: calling embed_batch");
        let embeddings = self.client.embed_batch(&texts, &self.model_name).await?;
        tracing::debug!(count = embeddings.len(), "embed_issues: batch done");
        let points: Vec<EmbedVector> = items
            .into_iter()
            .zip(embeddings.into_iter())
            .map(|(item, vector)| EmbedVector {
                id: item.entity_id(),
                vector,
                payload: EmbedPayload {
                    entity_type: item.entity_type().to_string(),
                    entity_id: item.entity_id(),
                    text: item.to_text(),
                    extra: None,
                },
            })
            .collect();
        let count = points.len();
        self.client.upsert(points).await?;
        tracing::info!(count = count, "embed_issues: upsert complete");
        Ok(())
    }
    pub async fn embed_skill(
        &self,
        skill_id: i64,
        name: &str,
        description: Option<&str>,
        content: &str,
        project_uuid: &str,
    ) -> crate::Result<()> {
        let desc = description.unwrap_or_default();
        let id = skill_id.to_string();
        tracing::debug!(skill_id = %skill_id, name = %name, content_len = content.len(), "embed_skill: starting");
        let texts = chunk_text(content);
        tracing::debug!(skill_id = %skill_id, chunks = texts.len(), "embed_skill: chunked");
        if texts.len() == 1 {
            self.client
                .embed_skill(&id, name, desc, content, project_uuid, &self.model_name)
                .await?;
        } else {
            let full_texts: Vec<String> = texts
                .iter()
                .map(|t| format!("{}: {} {}", name, desc, t))
                .collect();
            tracing::debug!(skill_id = %skill_id, "embed_skill: calling embed_batch");
            let embeddings = self
                .client
                .embed_batch(&full_texts, &self.model_name)
                .await?;
            let points: Vec<EmbedVector> = embeddings
                .into_iter()
                .enumerate()
                .map(|(i, vector)| EmbedVector {
                    id: format!("{}:chunk:{}", id, i),
                    vector,
                    payload: EmbedPayload {
                        entity_type: "skill".to_string(),
                        entity_id: project_uuid.to_string(),
                        text: texts[i].clone(),
                        extra: serde_json::json!({
                            "name": name,
                            "description": desc,
                            "chunk_index": i,
                            "total_chunks": texts.len(),
                        })
                        .into(),
                    },
                })
                .collect();
            self.client.upsert(points).await?;
        }
        tracing::info!(skill_id = %skill_id, chunks = texts.len(), "embed_skill: complete");
        Ok(())
    }
    pub async fn embed_issue_chunked(
        &self,
        id: &str,
        title: &str,
        body: Option<&str>,
    ) -> crate::Result<()> {
        let text = match body {
            Some(b) if !b.is_empty() => format!("{}\n\n{}", title, b),
            _ => title.to_string(),
        };
        let chunks = chunk_text(&text);
        if chunks.len() == 1 {
            return self.embed_issue(id, title, body).await;
        }
        let embeddings = self.client.embed_batch(&chunks, &self.model_name).await?;
        let points: Vec<EmbedVector> = embeddings
            .into_iter()
            .enumerate()
            .map(|(i, vector)| EmbedVector {
                id: format!("{}:chunk:{}", id, i),
                vector,
                payload: EmbedPayload {
                    entity_type: "issue".to_string(),
                    entity_id: id.to_string(),
                    text: chunks[i].clone(),
                    extra: serde_json::json!({
                        "chunk_index": i,
                        "total_chunks": chunks.len(),
                    })
                    .into(),
                },
            })
            .collect();
        self.client.upsert(points).await
    }
    pub async fn embed_memories_batch(&self, messages: Vec<EmbedMemoryInput>) -> crate::Result<()> {
        if messages.is_empty() {
            return Ok(());
        }
        let mut by_room: HashMap<String, Vec<(EmbedMemoryInput, Vec<String>)>> = HashMap::new();
        for msg in messages {
            let chunks = chunk_text(&msg.content);
            if chunks.is_empty() || chunks.iter().all(|c| c.trim().is_empty()) {
                continue;
            }
            let collection = super::qdrant::QdrantClient::room_memory_collection_name(
                &msg.project_name,
                &msg.room_id,
            );
            by_room.entry(collection).or_default().push((msg, chunks));
        }
        for (collection, entries) in &by_room {
            let all_texts: Vec<String> = entries
                .iter()
                .flat_map(|(_, chunks)| chunks.iter().cloned())
                .collect();
            if all_texts.is_empty() {
                continue;
            }
            let embeddings = self
                .client
                .embed_batch(&all_texts, &self.model_name)
                .await?;
            if let Some((first, _)) = entries.first() {
                let _ = self
                    .client
                    .ensure_room_memory_collection(
                        &first.project_name,
                        &first.room_id,
                        self.dimensions,
                    )
                    .await;
            }
            let mut points = Vec::new();
            let mut embed_idx = 0;
            for (msg, chunks) in entries {
                for (chunk_i, chunk) in chunks.iter().enumerate() {
                    if embed_idx >= embeddings.len() {
                        break;
                    }
                    let point_id = if chunks.len() == 1 {
                        msg.message_id.clone()
                    } else {
                        format!("{}:chunk:{}", msg.message_id, chunk_i)
                    };
                    points.push(EmbedVector {
                        id: point_id,
                        vector: embeddings[embed_idx].clone(),
                        payload: EmbedPayload {
                            entity_type: "memory".to_string(),
                            entity_id: msg.room_id.clone(),
                            text: chunk.clone(),
                            extra: serde_json::json!({
                                "message_id": msg.message_id,
                                "seq": msg.seq,
                                "user_id": msg.user_id,
                                "sender_type": msg.sender_type,
                                "chunk_index": if chunks.len() > 1 {
                                    Some(chunk_i)
                                } else {
                                    None
                                },
                                "total_chunks": if chunks.len() > 1 {
                                    Some(chunks.len())
                                } else {
                                    None
                                },
                            })
                            .into(),
                        },
                    });
                    embed_idx += 1;
                }
            }
            if let Err(e) = self.client.upsert_to_collection(collection, points).await {
                tracing::warn!(collection = %collection, error = %e, "batch memory embed failed");
            }
        }
        Ok(())
    }
    pub async fn embed_tags_batch(
        &self,
        tags: Vec<super::embeddable::TagEmbedInput>,
    ) -> crate::Result<()> {
        if tags.is_empty() {
            return Ok(());
        }
        let texts: Vec<String> = tags
            .iter()
            .map(|t| {
                if let Some(ref desc) = t.description {
                    if !desc.is_empty() {
                        format!("{}: {}", t.name, desc)
                    } else {
                        t.name.clone()
                    }
                } else {
                    t.name.clone()
                }
            })
            .collect();
        let embeddings = self.client.embed_batch(&texts, &self.model_name).await?;
        let points: Vec<EmbedVector> = tags
            .into_iter()
            .zip(embeddings.into_iter())
            .map(|(tag, vector)| {
                let point_id = format!("{}:{}", tag.repo_id, tag.name);
                EmbedVector {
                    id: point_id,
                    vector,
                    payload: EmbedPayload {
                        entity_type: "repo_tag".to_string(),
                        entity_id: tag.project_id.clone(),
                        text: tag.name.clone(),
                        extra: serde_json::json!({
                            "repo_id": tag.repo_id,
                            "repo_name": tag.repo_name,
                            "tag_name": tag.name,
                            "description": tag.description,
                        })
                        .into(),
                    },
                }
            })
            .collect();
        self.client.upsert(points).await
    }
    pub async fn embed_memory(
        &self,
        message_id: &str,
        text: &str,
        project_name: &str,
        room_id: &str,
        user_id: Option<&str>,
    ) -> crate::Result<()> {
        self.client
            .embed_memory(
                message_id,
                text,
                project_name,
                room_id,
                user_id,
                &self.model_name,
            )
            .await
    }
 }
--- a/libs/agent/embed/mod.rs
+++ b/libs/agent/embed/mod.rs
@ -1,90 +0,0 @@
 pub mod chunk;
 pub mod client;
 pub mod embeddable;
 pub mod entity_embed;
 pub mod qdrant;
 pub mod search;
 pub use client::{EmbedClient, EmbedPayload, EmbedVector, SearchResult};
 pub use embeddable::{EmbedMemoryInput, Embeddable, TagEmbedInput};
 pub use qdrant::QdrantClient;
 use std::sync::Arc;
 #[derive(Clone)]
 pub struct EmbedService {
    client: Arc<EmbedClient>,
    db: sea_orm::DatabaseConnection,
    model_name: String,
    dimensions: u64,
 }
 impl EmbedService {
    pub fn new(
        client: EmbedClient,
        db: sea_orm::DatabaseConnection,
        model_name: String,
        dimensions: u64,
    ) -> Self {
        Self {
            client: Arc::new(client),
            db,
            model_name,
            dimensions,
        }
    }
    pub async fn ensure_collections(&self) -> crate::Result<()> {
        self.client
            .ensure_collection("issue", self.dimensions)
            .await?;
        self.client
            .ensure_collection("repo", self.dimensions)
            .await?;
        self.client.ensure_skill_collection(self.dimensions).await?;
        self.client
            .ensure_collection("repo_tag", self.dimensions)
            .await?;
        Ok(())
    }
    pub fn db(&self) -> &sea_orm::DatabaseConnection {
        &self.db
    }
    pub fn client(&self) -> &Arc<EmbedClient> {
        &self.client
    }
    pub fn model_name(&self) -> &str {
        &self.model_name
    }
    pub fn dimensions(&self) -> u64 {
        self.dimensions
    }
 }
 pub async fn new_embed_client(config: &config::AppConfig) -> crate::Result<EmbedClient> {
    let base_url = config
        .get_embed_model_base_url()
        .map_err(|e| crate::AgentError::Internal(e.to_string()))?;
    let api_key = config
        .get_embed_model_api_key()
        .map_err(|e| crate::AgentError::Internal(e.to_string()))?;
    let qdrant_url = config
        .get_qdrant_url()
        .map_err(|e| crate::AgentError::Internal(e.to_string()))?;
    let qdrant_api_key = config.get_qdrant_api_key();
    let openai = rig::providers::openai::Client::builder()
        .api_key(&api_key)
        .base_url(&base_url)
        .build()
        .map_err(|e| {
            crate::AgentError::Internal(format!("failed to build rig openai client: {}", e))
        })?;
    let qdrant = QdrantClient::new(&qdrant_url, qdrant_api_key.as_deref()).await?;
    Ok(EmbedClient::new(openai, qdrant))
 }
--- a/libs/agent/embed/qdrant.rs
+++ b/libs/agent/embed/qdrant.rs
@ -1,373 +0,0 @@
 use qdrant_client::Qdrant;
 use qdrant_client::qdrant::{
    Condition, CreateCollectionBuilder, DeletePointsBuilder, Distance, FieldCondition, Filter,
    Match, PointStruct, SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, Vectors,
    condition::ConditionOneOf, r#match::MatchValue, point_id::PointIdOptions, value,
 };
 use std::collections::HashMap;
 use std::sync::Arc;
 use super::client::{EmbedPayload, SearchResult};
 use crate::embed::client::EmbedVector;
 pub struct QdrantClient {
    inner: Arc<Qdrant>,
 }
 impl Clone for QdrantClient {
    fn clone(&self) -> Self {
        Self {
            inner: self.inner.clone(),
        }
    }
 }
 impl QdrantClient {
    pub async fn new(url: &str, api_key: Option<&str>) -> crate::Result<Self> {
        let mut builder = Qdrant::from_url(url);
        if let Some(key) = api_key {
            builder = builder.api_key(key);
        }
        let inner = builder
            .build()
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(Self {
            inner: Arc::new(inner),
        })
    }
    fn collection_name(entity_type: &str) -> String {
        format!("embed_{}", entity_type)
    }
    /// Generate the collection name for a room's memory vectors.
    pub fn room_memory_collection_name(project_name: &str, room_id: &str) -> String {
        let _ = project_name;
        format!("room_memory_{}", room_id.replace('-', "_"))
    }
    pub async fn ensure_collection(&self, entity_type: &str, dimensions: u64) -> crate::Result<()> {
        let name = Self::collection_name(entity_type);
        self.ensure_collection_named(&name, dimensions).await
    }
    async fn ensure_collection_named(&self, name: &str, dimensions: u64) -> crate::Result<()> {
        let exists = self
            .inner
            .collection_exists(name)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        if exists {
            return Ok(());
        }
        let create_collection = CreateCollectionBuilder::new(name)
            .vectors_config(VectorParamsBuilder::new(dimensions, Distance::Cosine))
            .build();
        self.inner
            .create_collection(create_collection)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(())
    }
    /// Ensure a room-specific memory collection exists.
    pub async fn ensure_room_memory_collection(
        &self,
        project_name: &str,
        room_id: &str,
        dimensions: u64,
    ) -> crate::Result<()> {
        let name = Self::room_memory_collection_name(project_name, room_id);
        self.ensure_collection_named(&name, dimensions).await
    }
    pub async fn upsert_points(&self, points: Vec<EmbedVector>) -> crate::Result<()> {
        if points.is_empty() {
            return Ok(());
        }
        // Reject empty vectors — they cause Qdrant to reject the entire batch
        let empty_vectors = points.iter().filter(|p| p.vector.is_empty()).count();
        if empty_vectors > 0 {
            tracing::error!(
                empty_count = empty_vectors,
                total = points.len(),
                "upsert_points: REJECTING points with empty vectors"
            );
            return Err(crate::AgentError::Qdrant(format!(
                "refusing to upsert {} points with empty vectors",
                empty_vectors
            )));
        }
        let collection_name = Self::collection_name(&points[0].payload.entity_type);
        self.upsert_to_collection(&collection_name, points).await
    }
    /// Upsert points into a specific collection by name.
    pub async fn upsert_to_collection(
        &self,
        collection_name: &str,
        points: Vec<EmbedVector>,
    ) -> crate::Result<()> {
        if points.is_empty() {
            return Ok(());
        }
        let qdrant_points: Vec<PointStruct> = points
            .into_iter()
            .map(|p| {
                let mut payload: HashMap<String, qdrant_client::qdrant::Value> = HashMap::new();
                payload.insert("entity_type".to_string(), p.payload.entity_type.into());
                payload.insert("entity_id".to_string(), p.payload.entity_id.into());
                payload.insert("text".to_string(), p.payload.text.into());
                if let Some(extra) = p.payload.extra {
                    let extra_str = serde_json::to_string(&extra).unwrap_or_default();
                    payload.insert(
                        "extra".to_string(),
                        qdrant_client::qdrant::Value {
                            kind: Some(qdrant_client::qdrant::value::Kind::StringValue(extra_str)),
                        },
                    );
                }
                PointStruct::new(p.id, Vectors::from(p.vector), payload)
            })
            .collect();
        let upsert = UpsertPointsBuilder::new(collection_name, qdrant_points).build();
        self.inner
            .upsert_points(upsert)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(())
    }
    fn extract_string(value: &qdrant_client::qdrant::Value) -> String {
        match &value.kind {
            Some(value::Kind::StringValue(s)) => s.clone(),
            _ => String::new(),
        }
    }
    pub async fn search(
        &self,
        vector: &[f32],
        entity_type: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        let collection_name = Self::collection_name(entity_type);
        self.search_collection(&collection_name, vector, limit)
            .await
    }
    /// Search a specific collection by name.
    pub async fn search_collection(
        &self,
        collection_name: &str,
        vector: &[f32],
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        let search_req = SearchPointsBuilder::new(collection_name, vector.to_vec(), limit as u64)
            .with_payload(true)
            .build();
        let results = self
            .inner
            .search_points(search_req)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(results
            .result
            .into_iter()
            .filter_map(|p| {
                let entity_type = p
                    .payload
                    .get(&"entity_type".to_string())
                    .map(Self::extract_string)
                    .unwrap_or_default();
                let entity_id = p
                    .payload
                    .get(&"entity_id".to_string())
                    .map(Self::extract_string)
                    .unwrap_or_default();
                let text = p
                    .payload
                    .get(&"text".to_string())
                    .map(Self::extract_string)
                    .unwrap_or_default();
                let extra = p.payload.get(&"extra".to_string()).and_then(|v| {
                    let s = Self::extract_string(v);
                    if s.is_empty() {
                        None
                    } else {
                        serde_json::from_str::<serde_json::Value>(&s).ok()
                    }
                });
                let id =
                    p.id.and_then(|id| id.point_id_options)
                        .map(|opts| match opts {
                            PointIdOptions::Uuid(s) => s,
                            PointIdOptions::Num(n) => n.to_string(),
                        })
                        .unwrap_or_default();
                Some(SearchResult {
                    id,
                    score: p.score,
                    payload: EmbedPayload {
                        entity_type,
                        entity_id,
                        text,
                        extra,
                    },
                })
            })
            .collect())
    }
    pub async fn search_with_filter(
        &self,
        vector: &[f32],
        entity_type: &str,
        limit: usize,
        filter: Filter,
    ) -> crate::Result<Vec<SearchResult>> {
        let collection_name = Self::collection_name(entity_type);
        let search = SearchPointsBuilder::new(collection_name, vector.to_vec(), limit as u64)
            .with_payload(true)
            .filter(filter)
            .build();
        let results = self
            .inner
            .search_points(search)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(results
            .result
            .into_iter()
            .filter_map(|p| {
                let entity_type = p
                    .payload
                    .get(&"entity_type".to_string())
                    .map(Self::extract_string)
                    .unwrap_or_default();
                let entity_id = p
                    .payload
                    .get(&"entity_id".to_string())
                    .map(Self::extract_string)
                    .unwrap_or_default();
                let text = p
                    .payload
                    .get(&"text".to_string())
                    .map(Self::extract_string)
                    .unwrap_or_default();
                let extra = p.payload.get(&"extra".to_string()).and_then(|v| {
                    let s = Self::extract_string(v);
                    if s.is_empty() {
                        None
                    } else {
                        serde_json::from_str::<serde_json::Value>(&s).ok()
                    }
                });
                let id =
                    p.id.and_then(|id| id.point_id_options)
                        .map(|opts| match opts {
                            PointIdOptions::Uuid(s) => s,
                            PointIdOptions::Num(n) => n.to_string(),
                        })
                        .unwrap_or_default();
                Some(SearchResult {
                    id,
                    score: p.score,
                    payload: EmbedPayload {
                        entity_type,
                        entity_id,
                        text,
                        extra,
                    },
                })
            })
            .collect())
    }
    pub async fn delete_by_filter(&self, entity_type: &str, entity_id: &str) -> crate::Result<()> {
        let collection_name = Self::collection_name(entity_type);
        let filter = Filter {
            must: vec![Condition {
                condition_one_of: Some(ConditionOneOf::Field(FieldCondition {
                    key: "entity_id".to_string(),
                    r#match: Some(Match {
                        match_value: Some(MatchValue::Keyword(entity_id.to_string())),
                    }),
                    ..Default::default()
                })),
            }],
            ..Default::default()
        };
        let delete = DeletePointsBuilder::new(collection_name)
            .points(filter)
            .build();
        self.inner
            .delete_points(delete)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(())
    }
    pub async fn delete_collection(&self, entity_type: &str) -> crate::Result<()> {
        let name = Self::collection_name(entity_type);
        self.inner
            .delete_collection(name)
            .await
            .map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
        Ok(())
    }
    pub async fn ensure_memory_collection(&self, dimensions: u64) -> crate::Result<()> {
        self.ensure_collection("memory", dimensions).await
    }
    pub async fn ensure_skill_collection(&self, dimensions: u64) -> crate::Result<()> {
        self.ensure_collection("skill", dimensions).await
    }
    pub async fn search_memory(
        &self,
        vector: &[f32],
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        self.search(vector, "memory", limit).await
    }
    pub async fn search_skill(
        &self,
        vector: &[f32],
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        self.search(vector, "skill", limit).await
    }
 }
--- a/libs/agent/embed/search.rs
+++ b/libs/agent/embed/search.rs
@ -1,107 +0,0 @@
 use qdrant_client::qdrant::Filter;
 use super::client::SearchResult;
 /// Vector search operations for Qdrant-backed entity retrieval.
 impl super::EmbedService {
    pub async fn search_issues(
        &self,
        query: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        self.client
            .search(query, "issue", &self.model_name, limit)
            .await
    }
    pub async fn search_repos(
        &self,
        query: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        self.client
            .search(query, "repo", &self.model_name, limit)
            .await
    }
    pub async fn search_issues_filtered(
        &self,
        query: &str,
        limit: usize,
        filter: Filter,
    ) -> crate::Result<Vec<SearchResult>> {
        self.client
            .search_with_filter(query, "issue", &self.model_name, limit, filter)
            .await
    }
    /// Search repo tags by semantic similarity within a project.
    /// Filters by project_id (stored in entity_id) for project isolation.
    pub async fn search_tags(
        &self,
        query: &str,
        project_id: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        let mut results = self
            .client
            .search(query, "repo_tag", &self.model_name, limit + 1)
            .await?;
        results.retain(|r| r.payload.entity_id == project_id);
        results.truncate(limit);
        Ok(results)
    }
    /// Search skills by semantic similarity within a project.
    pub async fn search_skills(
        &self,
        query: &str,
        project_uuid: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        self.client
            .search_skills(query, &self.model_name, project_uuid, limit)
            .await
    }
    /// Search past conversation messages by semantic similarity within a room.
    pub async fn search_memories(
        &self,
        query: &str,
        project_name: &str,
        room_id: &str,
        limit: usize,
    ) -> crate::Result<Vec<SearchResult>> {
        self.client
            .search_memories(
                query,
                &self.model_name,
                project_name,
                room_id,
                limit,
                self.dimensions,
            )
            .await
    }
    pub async fn search_memories_after_seq(
        &self,
        query: &str,
        project_name: &str,
        room_id: &str,
        limit: usize,
        after_seq: Option<i64>,
    ) -> crate::Result<Vec<SearchResult>> {
        self.client
            .search_memories_after_seq(
                query,
                &self.model_name,
                project_name,
                room_id,
                limit,
                self.dimensions,
                after_seq,
            )
            .await
    }
 }
--- a/libs/agent/error.rs
+++ b/libs/agent/error.rs
@ -1,63 +0,0 @@
 use thiserror::Error;
 #[derive(Error, Debug)]
 pub enum AgentError {
    #[error("openai error: {0}")]
    OpenAi(String),
    #[error("qdrant error: {0}")]
    Qdrant(String),
    #[error("internal error: {0}")]
    Internal(String),
    #[error("not found: {0}")]
    NotFound(String),
    /// The task exceeded its timeout limit.
    #[error("task {task_id} timed out after {seconds}s")]
    Timeout { task_id: i64, seconds: u64 },
    /// The agent has been rate-limited; retry after the indicated delay.
    #[error("rate limited, retry after {retry_after_secs}s")]
    RateLimited { retry_after_secs: u64 },
    /// A transient error that can be retried.
    #[error("retryable error (attempt {attempt}): {message}")]
    Retryable { attempt: u32, message: String },
    /// The requested tool is not registered in the tool registry.
    #[error("tool not found: {tool}")]
    ToolNotFound { tool: String },
    /// A tool execution failed.
    #[error("tool '{tool}' execution failed: {cause}")]
    ToolExecutionFailed { tool: String, cause: String },
    /// The request contains invalid input.
    #[error("invalid input in '{field}': {reason}")]
    InvalidInput { field: String, reason: String },
 }
 pub type Result<T> = std::result::Result<T, AgentError>;
 impl From<qdrant_client::QdrantError> for AgentError {
    fn from(e: qdrant_client::QdrantError) -> Self {
        AgentError::Qdrant(e.to_string())
    }
 }
 impl From<sea_orm::DbErr> for AgentError {
    fn from(e: sea_orm::DbErr) -> Self {
        AgentError::Internal(e.to_string())
    }
 }
 impl From<crate::tool::ToolError> for AgentError {
    fn from(e: crate::tool::ToolError) -> Self {
        AgentError::ToolExecutionFailed {
            tool: String::new(),
            cause: e.to_string(),
        }
    }
 }
--- a/libs/agent/lib.rs
+++ b/libs/agent/lib.rs
@ -1,60 +0,0 @@
 pub mod agent;
 pub mod billing;
 pub mod chat;
 pub mod client;
 pub mod compact;
 pub mod embed;
 pub mod error;
 pub mod model;
 pub mod orao;
 pub mod perception;
 pub mod react;
 pub mod skills;
 pub mod sync;
 pub mod task;
 pub mod tokent;
 pub mod tool;
 pub use billing::{
    BillingRecord, BillingResult, check_balance, check_user_balance, initialize_project_billing,
    initialize_user_billing, persist_billing_error, record_ai_usage, record_user_ai_usage,
 };
 pub use chat::{
    AgentExecutionProfile, AgentRole, AiContextSenderType, AiRequest, AiStreamChunk, ChatService,
    Mention, RoomMessageContext, StreamCallback,
 };
 pub use client::types::ChatRequestMessage;
 pub use client::{AiCallResponse, AiClientConfig, call_with_params, call_with_retry};
 pub use compact::{
    CompactConfig, CompactLevel, CompactService, CompactSummary, MessageSummary,
    RoomCompactContext, RoomCompactRecord,
 };
 pub use embed::{
    EmbedClient, EmbedMemoryInput, EmbedService, QdrantClient, SearchResult, TagEmbedInput,
    new_embed_client,
 };
 pub use error::{AgentError, Result};
 pub use orao::{
    ActionExecutor, ActionResult, ActionType, ActionVerdict, OraoConfig, OraoExecutor,
    OraoExecutorBuilder, OraoOutcome, OraoStep, PerceptionSnapshot, PlannedAction, ReasoningOutput,
    RoundRecord, SafetyLevel,
 };
 pub use perception::{PerceptionService, SkillContext, SkillEntry, ToolCallEvent};
 pub use react::{
    DEFAULT_SYSTEM_PROMPT, PERSONAL_CONTEXT_PROMPT, ROOM_CONTEXT_PROMPT, ReactConfig, ReactStep,
 };
 pub use skills::{
    BuiltInSkill, SKILL_TEMPLATES, all_skill_slugs, all_skills, get_skill, get_skill_by_tool,
    is_built_in_skill, match_skill_by_keyword, skills_by_category,
 };
 pub use sync::list_accessible_models;
 pub use task::TaskService;
 pub use tokent::{TokenUsage, resolve_usage};
 pub use tool::{
    ToolCall, ToolCallRecord, ToolCallRecorder, ToolCallResult, ToolContext, ToolDefinition,
    ToolError, ToolExecutor, ToolHandler, ToolParam, ToolRegistry, ToolResult, ToolSchema,
 };
 #[cfg(feature = "rig")]
 pub use agent::RigAgentService;
 #[cfg(feature = "rig")]
 pub use tool::{RecordingTool, RigToolSet, is_retryable_tool_error};
--- a/libs/agent/model/capability.rs
+++ b/libs/agent/model/capability.rs
@ -1,117 +0,0 @@
 //! Model capability management — CRUD.
 use chrono::Utc;
 use db::database::AppDatabase;
 use models::agents::CapabilityType;
 use models::agents::model_capability;
 use sea_orm::*;
 use crate::error::AgentError;
 #[derive(Debug, Clone, serde::Deserialize, utoipa::ToSchema)]
 pub struct CreateModelCapabilityRequest {
    pub model_version_id: i64,
    pub capability: String,
    #[serde(default)]
    pub is_supported: bool,
 }
 #[derive(Debug, Clone, serde::Deserialize, utoipa::ToSchema)]
 pub struct UpdateModelCapabilityRequest {
    pub is_supported: Option<bool>,
 }
 #[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)]
 pub struct ModelCapabilityResponse {
    pub id: i64,
    pub model_version_id: i64,
    pub capability: String,
    pub is_supported: bool,
    pub created_at: chrono::DateTime<Utc>,
 }
 impl From<model_capability::Model> for ModelCapabilityResponse {
    fn from(mc: model_capability::Model) -> Self {
        Self {
            id: mc.id,
            model_version_id: mc.model_version_id,
            capability: mc.capability,
            is_supported: mc.is_supported,
            created_at: mc.created_at,
        }
    }
 }
 pub async fn list_capabilities(
    db: &AppDatabase,
    model_version_id: i64,
 ) -> Result<Vec<ModelCapabilityResponse>, AgentError> {
    let caps = model_capability::Entity::find()
        .filter(model_capability::Column::ModelVersionId.eq(model_version_id))
        .order_by_asc(model_capability::Column::Capability)
        .all(db)
        .await?;
    Ok(caps
        .into_iter()
        .map(ModelCapabilityResponse::from)
        .collect())
 }
 pub async fn get_capability(
    db: &AppDatabase,
    id: i64,
 ) -> Result<ModelCapabilityResponse, AgentError> {
    let cap = model_capability::Entity::find_by_id(id)
        .one(db)
        .await?
        .ok_or_else(|| AgentError::NotFound(format!("Capability record not found: {}", id)))?;
    Ok(ModelCapabilityResponse::from(cap))
 }
 pub async fn create_capability(
    db: &AppDatabase,
    request: CreateModelCapabilityRequest,
 ) -> Result<ModelCapabilityResponse, AgentError> {
    let _ = request
        .capability
        .parse::<CapabilityType>()
        .map_err(|_| AgentError::InvalidInput {
            field: "capability".into(),
            reason: "Invalid capability type".into(),
        })?;
    let now = Utc::now();
    let active = model_capability::ActiveModel {
        model_version_id: Set(request.model_version_id),
        capability: Set(request.capability),
        is_supported: Set(request.is_supported),
        created_at: Set(now),
        ..Default::default()
    };
    let cap = active.insert(db).await?;
    Ok(ModelCapabilityResponse::from(cap))
 }
 pub async fn update_capability(
    db: &AppDatabase,
    id: i64,
    request: UpdateModelCapabilityRequest,
 ) -> Result<ModelCapabilityResponse, AgentError> {
    let cap = model_capability::Entity::find_by_id(id)
        .one(db)
        .await?
        .ok_or_else(|| AgentError::NotFound(format!("Capability record not found: {}", id)))?;
    let mut active: model_capability::ActiveModel = cap.into();
    if let Some(is_supported) = request.is_supported {
        active.is_supported = Set(is_supported);
    }
    let cap = active.update(db).await?;
    Ok(ModelCapabilityResponse::from(cap))
 }
 pub async fn delete_capability(db: &AppDatabase, id: i64) -> Result<(), AgentError> {
    model_capability::Entity::delete_by_id(id).exec(db).await?;
    Ok(())
 }
--- a/libs/agent/model/mod.rs
+++ b/libs/agent/model/mod.rs
@ -1,6 +0,0 @@
 pub mod capability;
 pub mod model_entry;
 pub mod parameter_profile;
 pub mod pricing;
 pub mod provider;
 pub mod version;
--- a/Show More
+++ b/Show More
		`@ -1 +0,0 @@`
			`{{/* Secret disabled — all config via ConfigMap */}}`
		`@ -1 +0,0 @@`
			`// Frontend embedding is handled by libs/frontend crate. ci`