feat: 1.0

This commit is contained in:
zhenyi 2026-05-30 01:38:27 +08:00
parent 2b543f5e37
commit e1330451a5
2277 changed files with 22298 additions and 232612 deletions

View File

@ -1,13 +1,79 @@
.git/ # Git
.idea/ .git
.vscode/ .gitignore
node_modules/
*.log
.env
.env.local
.env.*.local
# Exclude all target/ content, then selectively re-include release binaries # IDE
.idea
.vscode
*.swp
*.swo
*~
# Rust build artifacts
target/ target/
!target/release/ **/target/
!target/x86_64-unknown-linux-gnu/release/
# Node.js
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
bun.lockb
# Build output
dist/
build/
# Environment and secrets
.env
.env.*
!.env.example
# Docker
docker/
docker-compose*.yml
.dockerignore
Dockerfile*
*.Dockerfile
# Documentation
*.md
LICENSE
doc/
# Test and CI
tests/
__tests__/
*.test.*
*.spec.*
.github/
.gitlab/
.circleci/
# OS files
.DS_Store
Thumbs.db
desktop.ini
# Logs
logs/
*.log
# Temporary files
tmp/
temp/
.tmp/
# Certificates (use secrets in production)
cert/
# Data directories
data/
# Agent configs
.agent/
.agents/
.claude/
CLAUDE.md
AGENTS.md

View File

@ -1,136 +0,0 @@
# =============================================================================
# Required - 程序启动必须配置
# =============================================================================
# 数据库连接
APP_DATABASE_URL=postgresql://user:password@localhost:5432/dbname
APP_DATABASE_SCHEMA_SEARCH_PATH=public
# Redis支持多节点逗号分隔
APP_REDIS_URL=redis://localhost:6379
# APP_REDIS_URLS=redis://localhost:6379,redis://localhost:6378
# AI 服务
APP_AI_BASIC_URL=https://api.openai.com/v1
APP_AI_API_KEY=sk-xxxxx
# Embedding + 向量检索
APP_EMBED_MODEL_BASE_URL=https://api.openai.com/v1
APP_EMBED_MODEL_API_KEY=sk-xxxxx
APP_EMBED_MODEL_NAME=text-embedding-3-small
APP_EMBED_MODEL_DIMENSIONS=1536
APP_QDRANT_URL=http://localhost:6333
# APP_QDRANT_API_KEY=
# SMTP 邮件
APP_SMTP_HOST=smtp.example.com
APP_SMTP_PORT=587
APP_SMTP_USERNAME=noreply@example.com
APP_SMTP_PASSWORD=xxxxx
APP_SMTP_FROM=noreply@example.com
APP_SMTP_TLS=true
APP_SMTP_TIMEOUT=30
# 文件存储
APP_AVATAR_PATH=/data/avatars
# Git 仓库存储根目录
APP_REPOS_ROOT=/data/repos
# =============================================================================
# Domain / URL可选有默认值
# =============================================================================
APP_DOMAIN_URL=http://127.0.0.1
# APP_STATIC_DOMAIN=
# APP_MEDIA_DOMAIN=
# APP_GIT_HTTP_DOMAIN=
# =============================================================================
# Database Pool可选有默认值
# =============================================================================
# APP_DATABASE_MAX_CONNECTIONS=10
# APP_DATABASE_MIN_CONNECTIONS=2
# APP_DATABASE_IDLE_TIMEOUT=60000 (milliseconds, default: 60s)
# APP_DATABASE_MAX_LIFETIME=300000 (milliseconds, default: 300s)
# APP_DATABASE_CONNECTION_TIMEOUT=5000 (milliseconds, default: 5s)
# APP_DATABASE_REPLICAS=
# APP_DATABASE_HEALTH_CHECK_INTERVAL=30
# APP_DATABASE_RETRY_ATTEMPTS=3
# APP_DATABASE_RETRY_DELAY=5
# =============================================================================
# Redis Pool可选有默认值
# =============================================================================
# APP_REDIS_POOL_SIZE=10
# APP_REDIS_CONNECT_TIMEOUT=5
# APP_REDIS_ACQUIRE_TIMEOUT=5
# =============================================================================
# SSH可选有默认值
# =============================================================================
# APP_SSH_DOMAIN=
# APP_SSH_PORT=22
# APP_SSH_SERVER_PRIVATE_KEY=
# APP_SSH_SERVER_PUBLIC_KEY=
# =============================================================================
# Logging可选有默认值
# =============================================================================
# APP_LOG_LEVEL=info
# APP_LOG_FORMAT=json
# APP_LOG_FILE_ENABLED=false
# APP_LOG_FILE_PATH=./logs
# APP_LOG_FILE_ROTATION=daily
# APP_LOG_FILE_MAX_FILES=7
# APP_LOG_FILE_MAX_SIZE=104857600
# OpenTelemetry可选默认关闭
# APP_OTEL_ENABLED=false
# APP_OTEL_ENDPOINT=http://localhost:5080/api/default/v1/traces
# APP_OTEL_SERVICE_NAME=
# APP_OTEL_SERVICE_VERSION=
# APP_OTEL_AUTHORIZATION=
# APP_OTEL_ORGANIZATION=
# =============================================================================
# NATS / Hook Pool可选有默认值
# =============================================================================
# HOOK_POOL_MAX_CONCURRENT=CPU 核数)
# HOOK_POOL_CPU_THRESHOLD=80.0
# HOOK_POOL_REDIS_LIST_PREFIX={hook}
# HOOK_POOL_REDIS_LOG_CHANNEL=hook:logs
# HOOK_POOL_REDIS_BLOCK_TIMEOUT=5
# HOOK_POOL_REDIS_MAX_RETRIES=3
# HOOK_POOL_WORKER_ID=(随机 UUID
# =============================================================================
# Frontend (Vite) — 前端运行环境变量
# =============================================================================
# API 基础 URL为空时使用 Vite dev 代理 /api -> localhost:8080
# VITE_API_BASE_URL=http://localhost:8080
# 前端 WebSocket 连接地址(开发模式通过 Vite 代理)
VITE_WS_URL=ws://localhost:5080
# API URL前端 API 调用,通过 Vite 代理时可为空)
VITE_API_URL=
# WebSocket 连接模式: "raw-ws" | "socketio"
VITE_WS_MODE=raw-ws
# =============================================================================
# Frontend: Grafana Faro (RUM) — 前端性能监控(可选)
# =============================================================================
# VITE_FARO_ENABLED=false
# VITE_FARO_URL=https://faro.example.com/collect
# VITE_FARO_API_KEY=
# VITE_FARO_APP_NAME=GitDataAIWeb
# VITE_FARO_APP_ENV=production
# VITE_FARO_APP_VERSION=0.0.1

83
.gitignore vendored
View File

@ -1,29 +1,62 @@
# Rust build artifacts
/target /target
node_modules **/target/
.claude
.zed # Rust IDE and tooling
.vscode .idea/
.idea .vscode/
*.swp
*.swo
*~
# Environment files
.env .env
.env.local .env.local
dist .env.*.local
deploy/secrets.yaml .env.production
.codex
.qwen # OS files
.opencode .DS_Store
.omc Thumbs.db
AGENT.md desktop.ini
ARCHITECTURE.md
.agents # Node.js
.agents.md
.next
node_modules/ node_modules/
coverage/ npm-debug.log*
.pnpm-store/ yarn-debug.log*
pnpm-lock.yaml yarn-error.log*
package-lock.json pnpm-debug.log*
yarn.lock
.gemini # Build output
.omg dist/
/.sqry build/
deploy/.server.yaml
# Logs
logs/
*.log
# Data and certificates
data/
cert/
# Docker
docker-compose.override.yml
.docker/
# Agent configs
.claude/
.codex/
.agent/
.agents/
CLAUDE.md
AGENTS.md
migrate.sh
# Temporary files
tmp/
temp/
.tmp/
# Backup files
*.bak
*.backup
*~

View File

@ -1,11 +0,0 @@
{
"mcpServers": {
"shadcn": {
"command": "npx",
"args": [
"shadcn@latest",
"mcp"
]
}
}
}

View File

@ -1,7 +0,0 @@
node_modules/
coverage/
.pnpm-store/
pnpm-lock.yaml
package-lock.json
pnpm-lock.yaml
yarn.lock

View File

@ -1,11 +0,0 @@
{
"endOfLine": "lf",
"semi": false,
"singleQuote": false,
"tabWidth": 2,
"trailingComma": "es5",
"printWidth": 80,
"plugins": ["prettier-plugin-tailwindcss"],
"tailwindStylesheet": "src/index.css",
"tailwindFunctions": ["cn", "cva"]
}

7059
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,192 +1,5 @@
[workspace]
members = [
# "libs/frontend",
"libs/models",
"libs/session",
"libs/git",
"libs/email",
"libs/queue",
"libs/room",
"libs/config",
"libs/service",
"libs/db",
"libs/api",
"libs/transport",
"libs/observability",
"libs/avatar",
"libs/agent",
"libs/migrate",
"libs/fctool",
"libs/gingress-proxy",
"apps/migrate",
"apps/app",
"apps/git-hook",
"apps/gitserver",
"apps/email",
"apps/static",
"apps/metrics",
"apps/gingress",
]
resolver = "3"
[workspace.dependencies]
models = { path = "libs/models" }
session = { path = "libs/session" }
git = { path = "libs/git" }
email = { path = "libs/email" }
queue = { path = "libs/queue" }
room = { path = "libs/room" }
config = { path = "libs/config" }
service = { path = "libs/service" }
db = { path = "libs/db" }
api = { path = "libs/api" }
agent = { path = "libs/agent" }
observability = { path = "libs/observability" }
avatar = { path = "libs/avatar" }
migrate = { path = "libs/migrate" }
fctool = { path = "libs/fctool" }
transport = { path = "libs/transport" }
metrics-aggregator = { path = "apps/metrics" }
gingress-proxy = { path = "libs/gingress-proxy" }
gingress = { path = "apps/gingress" }
sea-query = "1.0.0-rc.33"
actix-web = "4.13.0"
actix-files = "0.6.10"
actix-cors = "0.7.1"
actix-session = "0.11.0"
actix-ws = "0.4.0"
actix-multipart = "0.7.2"
actix-analytics = "1.2.1"
actix-jwt-session = "1.0.7"
actix-csrf = "0.8.0"
metrics = "0.24.5"
actix-rt = "2.11.0"
actix = "0.13"
async-stream = "0.3"
actix-service = "2.0.3"
actix-utils = "3.0.1"
redis = "1.1.0"
anyhow = "1.0.102"
derive_more = "2.1.1"
blake3 = "1.8.3"
argon2 = "0.5.3"
thiserror = "2.0.18"
password-hash = "0.6.0"
awc = "3.8.2"
bstr = "1.12.1"
captcha-rs = "0.5.0"
deadpool-redis = "0.23.0"
deadpool = "0.13.0"
dotenv = "0.15.0"
env_logger = "0.11.10"
brotli = "7.0"
flate2 = "1.1.9"
git2 = "0.20.4"
slog = "2.8.2"
git2-ext = "1.0.0"
git2-hooks = "0.7.0"
futures = "0.3.32"
futures-util = "0.3.32"
globset = "0.4.18"
hex = "0.4.3"
lettre = { version = "0.11.19", default-features = false, features = ["tokio1-rustls-tls", "smtp-transport", "builder", "pool"] }
mime = "0.3.17"
mime_guess2 = "2.3.1"
opentelemetry = "0.31.0"
opentelemetry-otlp = { version = "0.31.0", features = ["http-proto", "trace"] }
opentelemetry_sdk = { version = "0.31.0", features = ["rt-tokio"] }
opentelemetry-http = "0.31.0"
prost = "0.14.3"
prost-build = "0.14.3"
qdrant-client = "1.17.0"
prost-types = "0.14.3"
rand = "0.10.0"
russh = { version = "0.60.2", default-features = false, features = ["ring", "rsa"] }
hmac = { version = "0.13" }
hkdf = "0.13.0"
sha1_smol = "1.0.1"
rsa = { version = "0.9.7", package = "rsa" }
reqwest = { version = "0.13.2", default-features = false }
dotenvy = "0.15.7"
# aws-lc-sys requires NASM on Windows, so we use local filesystem storage instead of S3
# aws-sdk-s3 = "1.127.0"
sea-orm = "2.0.0-rc.37"
sea-orm-migration = "2.0.0-rc.37"
sha1 = "0.11"
sha2 = "0.11"
sysinfo = "0.39.1"
ssh-key = "0.7.0-rc.9"
tar = "0.4.45"
zip = "8.3.1"
tokenizer = "0.1.2"
tiktoken-rs = "0.11.0"
regex = "1.12.3"
jsonwebtoken = "10.3.0"
once_cell = "1.21.4"
async-trait = "0.1.89"
fs2 = "0.4.3"
image = "0.25.10"
tokio = "1.50.0"
tokio-util = "0.7.18"
tokio-stream = { version = "0.1.18", features = ["sync"] }
url = "2.5.8"
tower = "0.5"
num_cpus = "1.17.0"
ring = "0.17"
rustls = { version = "0.23", default-features = false, features = ["ring", "std", "tls12"] }
clap = "4.6.0"
time = "0.3.47"
chrono = "0.4.44"
tracing = "0.1.44"
tracing-subscriber = { version = "0.3.23", features = ["env-filter", "json", "tracing-log"] }
tracing-opentelemetry = "0.32.1"
tonic = "0.14.5"
tonic-build = "0.14.5"
uuid = "1.22.0"
hostname = "0.4"
utoipa = { version = "5.4.0", features = ["chrono", "uuid"] }
rust_decimal = "1.40.0"
walkdir = "2.5.0"
calamine = "0.26"
csv = "1.3"
lopdf = "0.34"
pulldown-cmark = "0.12"
quick-xml = { version = "0.37", features = ["serialize"] }
sqlparser = "0.55"
lazy_static = "1.5"
chacha20poly1305 = "0.10"
md5 = "0.7"
moka = "0.12.15"
dashmap = "7.0.0-rc2"
serde = "1.0.228"
serde_json = "1.0.149"
serde_yaml = "0.9.33"
serde_bytes = "0.11.19"
phf = "0.13.1"
phf_codegen = "0.13.1"
base64 = "0.22.1"
base64ct = "1"
p256 = { version = "0.13", features = ["ecdsa", "std"] }
# http version varies per-crate (pingora needs 1.x, actix needs 0.2)
hyper = "0.14"
tempfile = "3"
rig-core = { version = "0.36.0", default-features = false }
tokio-tungstenite = { version = "0.29.0", features = [] }
async-nats = { version = "0.48.0", features = [] }
kube = { version = "3.1.0", features = ["runtime", "derive"] }
k8s-openapi = { version = "0.27", features = ["v1_31"] }
pingora = { version = "0.8", features = ["proxy"] }
pingora-proxy = "0.8"
pingora-load-balancing = "0.8"
pingora-cache = "0.8"
rustls-pemfile = "2"
[workspace.package] [workspace.package]
version = "0.2.9" version = "1.0.0"
edition = "2024" edition = "2024"
authors = [] authors = []
description = "" description = ""
@ -198,6 +11,31 @@ keywords = []
categories = [] categories = []
documentation = "" documentation = ""
[workspace]
members = [
"app/email",
"app/gitdata",
"app/gitpod",
"app/gitsync",
"lib/ai",
"lib/api",
"lib/cache",
"lib/channel",
"lib/config",
"lib/db",
"lib/email",
"lib/git",
"lib/issues",
"lib/migrate",
"lib/model",
"lib/queue",
"lib/service",
"lib/session",
"lib/storage",
"lib/parsefile"
, "lib/socketio"]
resolver = "3"
[workspace.lints.rust] [workspace.lints.rust]
unsafe_code = "warn" unsafe_code = "warn"
@ -205,36 +43,105 @@ unsafe_code = "warn"
unwrap_used = "warn" unwrap_used = "warn"
expect_used = "warn" expect_used = "warn"
[profile.dev] [workspace.dependencies]
debug = 1 ai = { path = "lib/ai" }
incremental = true api = { path = "lib/api" }
codegen-units = 256 cache = { path = "lib/cache" }
channel = { path = "lib/channel" }
config = { path = "lib/config" }
db = { path = "lib/db" }
email = { path = "lib/email" }
git = { path = "lib/git" }
issues = { path = "lib/issues" }
migrate = { path = "lib/migrate" }
model = { path = "lib/model" }
queue = { path = "lib/queue" }
service = { path = "lib/service" }
session = { path = "lib/session" }
storage = { path = "lib/storage" }
parsefile = { path = "lib/parsefile"}
socketio = { path = "lib/socketio" }
[profile.release] leptos = "0.8.19"
lto = "thin" leptos_actix = "0.8.7"
codegen-units = 1 leptos_meta = "0.8.6"
strip = true leptos_router = "0.8.13"
opt-level = 3 server_fn = { version = "0.8.10", features = ["actix"] }
actix-http = "3.11"
actix-ws = "0.4.0"
urlencoding = "2.1"
serde_urlencoded = "0.7"
juniper = "0.17.1"
[profile.dev.package.num-bigint-dig] ractor = "0.15.13"
opt-level = 3 ractor_cluster = "0.15.13"
async-nats = "0.48.0"
petgraph = "0.8.3"
[package] async-openai = "0.40.0"
name = "workspace" rig-core = { version = "0.36.0", default-features = false, features = ["derive"] }
version.workspace = true schemars = "1.2.1"
edition.workspace = true tokio-stream = "0.1.18"
authors.workspace = true duct = "1.1.1"
description.workspace = true lettre = "0.11.22"
repository.workspace = true actix-web = "4"
readme.workspace = true jsonwebtoken = { version = "10.4.0", features = ["rust_crypto"] }
homepage.workspace = true futures-util = "0.3"
license.workspace = true futures = "0.3.32"
keywords.workspace = true moka = "0.12.15"
categories.workspace = true tokio = "1.52.3"
documentation.workspace = true redis = "1.2.1"
serde_json = "1.0.149"
[lib] indexmap = "2.14.0"
path = "lib.rs" sea-orm-migration = "2.0.0-rc.38"
crate-type = ["lib"] sea-orm = { version = "2.0.0-rc.38", features = ["sqlx-all","runtime-tokio","rust_decimal","uuid","chrono"]}
async-trait = "0.1.89"
aws-config = "1.8.16"
aws-sdk-s3 = "1.132.0"
rust_decimal = "1.42.0"
utoipa = "5.5.0"
dotenvy = "0.15.7"
anyhow = "1.0.102"
derive_more = "2.1.1"
serde = "1.0.228"
serde_yaml = "0.9.33"
comrak = "0.38"
sqlparser = "0.62.0"
qdrant-client = "1.18.0"
tiktoken-rs = "0.11.0"
tracing-subscriber = "0.3.23"
thiserror = "2.0.18"
uuid = "1.23.1"
git2 = "0.21.0"
gix = { version = "0.83.0", features = ["max-performance-safe", "serde", "merge", "blame", "revision", "blob-diff", "worktree-stream", "worktree-archive", "mailmap"] }
gix-archive = "0.32.0"
gix-worktree-stream = "0.32.0"
num_cpus = "1.17.0"
tracing = "0.1.44"
actix-service = "2.0.3"
actix-rt = "2.11.0"
actix-utils = "3.0.1"
toasty = "0.6.1"
chrono = "0.4.44"
argon2 = "0.5.3"
rand = "0.10.1"
rand_core = { version = "0.10.1", features = ["getrandom"] }
totp-rs = "5.7.1"
url = "2.5.7"
sha2 = "0.11.0"
base64 = "0.22"
tonic = "0.14.6"
tonic-build = "0.14.6"
prost = "0.14.3"
tonic-prost = "0.14.6"
dashmap = "6"
sqlx = "0.9.0"
russh = { version = "0.61.1", features = ["legacy-ed25519-pkcs8-parser"] }
hex = "0.4"
async-stream = "0.3"
tokio-util = "0.7"
password-hash = "0.6.1"
deadpool-redis = { version = "0.23", features = ["cluster"] }
reqwest = { version = "0.13", features = ["json", "rustls", "system-proxy"] }
hmac = "0.13"
mcpkit = "0.5"
miette = "7"

View File

@ -1,21 +0,0 @@
# React + TypeScript + Vite + shadcn/ui
This is a template for a new Vite project with React, TypeScript, and shadcn/ui.
## Adding components
To add components to your app, run the following command:
```bash
npx shadcn@latest add button
```
This will place the ui components in the `src/components` directory.
## Using components
To use the components in your app, import them as follows:
```tsx
import { Button } from "@/components/ui/button"
```

View File

@ -1,39 +0,0 @@
[package]
name = "app"
version.workspace = true
edition.workspace = true
authors.workspace = true
description.workspace = true
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[dependencies]
tokio = { workspace = true, features = ["full"] }
uuid = { workspace = true }
service = { workspace = true }
observability = { workspace = true }
room = { workspace = true }
sha2 = { workspace = true }
hkdf = { workspace = true }
hmac = { workspace = true }
api = { workspace = true }
session = { workspace = true }
config = { workspace = true }
db = { workspace = true }
migrate = { workspace = true }
actix-web = { workspace = true }
actix-cors = { workspace = true }
futures = { workspace = true }
tracing = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true }
sea-orm = { workspace = true }
serde_json = { workspace = true }
chrono = { workspace = true }
[lints]
workspace = true

View File

@ -1,12 +0,0 @@
use clap::Parser;
#[derive(Parser, Debug)]
#[command(name = "app")]
#[command(version)]
pub struct ServerArgs {
#[arg(long, short)]
pub bind: Option<String>,
#[arg(long)]
pub workers: Option<usize>,
}

View File

@ -1,133 +0,0 @@
//! Structured HTTP request logging middleware using tracing.
//!
//! Logs every incoming request with method, path, status code,
//! response time, client IP, authenticated user ID, and trace_id.
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
use futures::future::{LocalBoxFuture, Ready, ok};
use session::SessionExt;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Instant;
use uuid::Uuid;
/// Default log format: `{method} {path} {status} {duration_ms}ms`
pub struct RequestLogger {
trace_id_header: String,
}
impl RequestLogger {
pub fn new(trace_id_header: String) -> Self {
Self { trace_id_header }
}
}
impl<S, B> Transform<S, ServiceRequest> for RequestLogger
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Transform = RequestLoggerMiddleware<S>;
type InitError = ();
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(RequestLoggerMiddleware {
service: Arc::new(service),
trace_id_header: self.trace_id_header.clone(),
})
}
}
pub struct RequestLoggerMiddleware<S> {
service: Arc<S>,
trace_id_header: String,
}
impl<S> Clone for RequestLoggerMiddleware<S> {
fn clone(&self) -> Self {
Self {
service: self.service.clone(),
trace_id_header: self.trace_id_header.clone(),
}
}
}
impl<S, B> Service<ServiceRequest> for RequestLoggerMiddleware<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&self, req: ServiceRequest) -> Self::Future {
let started = Instant::now();
let trace_id_header = self.trace_id_header.clone();
let method = req.method().to_string();
let path = req.path().to_string();
let query = req.query_string().to_string();
let remote = req
.connection_info()
.realip_remote_addr()
.map(|s| s.to_string())
.unwrap_or_else(|| "unknown".to_string());
let user_id: Option<Uuid> = req.get_session().user();
let trace_id = Uuid::now_v7().to_string();
let full_path = if query.is_empty() {
path.clone()
} else {
format!("{}?{}", path, query)
};
let service = self.service.clone();
Box::pin(async move {
let res = service.call(req).await?;
let elapsed = started.elapsed();
let status = res.status();
let status_code = status.as_u16();
let is_health = path == "/health";
if !is_health {
let user_id_str = user_id
.map(|u: Uuid| u.to_string())
.unwrap_or_else(|| "-".to_string());
let duration_ms = elapsed.as_millis() as u64;
let log_args = (
method = %method,
path = %full_path,
status = status_code,
duration_ms = duration_ms,
remote = %remote,
user_id = %user_id_str,
trace_id = %trace_id,
);
match status_code {
200..=299 => {
tracing::info!(log_args, "http_request");
}
400..=499 => {
tracing::warn!(log_args, "http_request");
}
_ => {
tracing::error!(log_args, "http_request");
}
}
}
Ok(res)
})
}
}

View File

@ -1,350 +0,0 @@
use actix_cors::Cors;
use actix_web::cookie::time::Duration;
use actix_web::dev::{Service, ServiceRequest, ServiceResponse};
use actix_web::{App, HttpResponse, HttpServer, cookie::Key, web};
use api::{robots, sidemap};
use clap::Parser;
use db::cache::AppCache;
use db::database::AppDatabase;
use futures::future::LocalBoxFuture;
use observability::{
HttpMetrics, HttpSnapshotGuard, MetricsMiddleware, TracingSpanMiddleware,
init_tracing_subscriber, install_recorder, prometheus_handler, push::MetricsPusher,
spawn_http_metrics_poller,
};
use sea_orm::ConnectionTrait;
use service::AppService;
use session::SessionMiddleware;
use session::config::{PersistentSession, SessionLifecycle, TtlExtensionPolicy};
use session::storage::RedisClusterSessionStore;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Instant;
mod args;
use args::ServerArgs;
use config::AppConfig;
use migrate::{Migrator, MigratorTrait};
#[derive(Clone)]
pub struct AppState {
pub db: AppDatabase,
pub cache: AppCache,
}
/// Custom middleware that logs requests except for noisy paths.
struct RequestLogger;
impl<S, B> actix_web::dev::Transform<S, ServiceRequest> for RequestLogger
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Transform = RequestLoggerService<S>;
type InitError = ();
type Future = futures::future::Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
futures::future::ok(RequestLoggerService {
service,
_marker: std::marker::PhantomData,
})
}
}
struct RequestLoggerService<S> {
service: S,
_marker: std::marker::PhantomData<fn(ServiceRequest)>,
}
impl<S, B> actix_web::dev::Service<ServiceRequest> for RequestLoggerService<S>
where
S: actix_web::dev::Service<
ServiceRequest,
Response = ServiceResponse<B>,
Error = actix_web::Error,
>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&self, req: ServiceRequest) -> Self::Future {
let path = req.path().to_string();
let method = req.method().to_string();
let should_log = !(path == "/health"
|| path == "/metrics"
|| path.starts_with("/ws")
|| path.starts_with("/assets"));
let start = Instant::now();
let fut = self.service.call(req);
Box::pin(async move {
let res = fut.await?;
if should_log {
tracing::info!(
target: "http_request",
method = %method,
path = %path,
status = res.status().as_u16(),
elapsed = ?start.elapsed(),
"{} {} {} {:?}",
method,
path,
res.status().as_u16(),
start.elapsed()
);
}
Ok(res)
})
}
}
fn build_session_key(cfg: &AppConfig) -> anyhow::Result<Key> {
if let Some(secret) = cfg.env.get("APP_SESSION_SECRET") {
if secret.len() < 32 {
tracing::warn!(
secret_len = secret.len(),
"APP_SESSION_SECRET is too short (<32 bytes), using generated key instead"
);
return Ok(Key::generate());
}
use hkdf::Hkdf;
use sha2::Sha256;
// HKDF-SHA256: standard key derivation with info string for domain separation
let hk = Hkdf::<Sha256>::new(Some(b"session-cookie-key"), secret.as_bytes());
let mut okm = [0u8; 64];
hk.expand(b"actix-session-signing-key", &mut okm)
.map_err(|e| anyhow::anyhow!("HKDF expand failed: {}", e))?;
return Ok(Key::from(&okm));
}
tracing::warn!(
"APP_SESSION_SECRET not set, using generated key (sessions invalidated on restart)"
);
Ok(Key::generate())
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cfg = AppConfig::load();
let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string());
let otel_enabled = cfg.otel_enabled().unwrap_or(false);
init_tracing_subscriber(&log_level, false);
tracing::info!(
app_name = %cfg.app_name().unwrap_or_default(),
app_version = %cfg.app_version().unwrap_or_default(),
"Starting application"
);
let db = AppDatabase::init(&cfg).await?;
tracing::info!("Database connected");
let redis_urls = cfg.redis_urls()?;
let store: RedisClusterSessionStore = RedisClusterSessionStore::new(redis_urls).await?;
tracing::info!("Redis connected");
let cache = AppCache::init(&cfg).await?;
tracing::info!("Cache initialized");
run_migrations(&db).await?;
let session_key = build_session_key(&cfg)?;
let args = ServerArgs::parse();
let service = AppService::new(cfg.clone()).await?;
tracing::info!("AppService initialized");
let _model_sync_handle = service.clone().start_sync_task();
// TODO: workspace module not yet wired — billing alert task pending
// let _billing_alert_handle = service.clone().start_billing_alert_task();
let (shutdown_tx, shutdown_rx) = tokio::sync::broadcast::channel::<()>(1);
let worker_service = service.clone();
let worker_handle =
tokio::spawn(async move { worker_service.start_room_workers(shutdown_rx).await });
let _otel_guard = if otel_enabled {
let endpoint = cfg
.otel_endpoint()
.unwrap_or_else(|_| "http://localhost:4317".to_string());
let service_name = cfg
.otel_service_name()
.unwrap_or_else(|_| "app".to_string());
let service_version = cfg
.otel_service_version()
.unwrap_or_else(|_| "0.1.0".to_string());
tracing::info!(endpoint = %endpoint, service = %service_name, "OTLP tracing enabled");
let guard =
observability::init_otlp(&endpoint, &service_name, &service_version, &log_level)
.map_err(|e| anyhow::anyhow!("OTLP init failed: {}", e))?;
guard
} else {
None
};
let prometheus_handle = install_recorder();
let prometheus_handle_data = web::Data::new(prometheus_handle.clone());
let http_metrics = std::sync::Arc::new(HttpMetrics::new());
let http_snapshot: HttpSnapshotGuard = std::sync::Arc::new(std::sync::RwLock::new(
observability::HttpMetricsSnapshot::default(),
));
let http_snapshot_for_poller = http_snapshot.clone();
spawn_http_metrics_poller(
http_metrics.clone(),
http_snapshot_for_poller,
std::time::Duration::from_secs(15),
);
let http_snapshot_data = web::Data::new(http_snapshot);
// Metrics pusher: periodically push all metrics to apps/metrics aggregator
if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
let pusher = MetricsPusher::new(&push_url, "app");
pusher.spawn(
http_metrics.clone(),
Arc::new(prometheus_handle.clone()),
std::time::Duration::from_secs(15),
);
tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
}
let bind_addr = args.bind.unwrap_or_else(|| "127.0.0.1:8080".to_string());
tracing::info!(bind_addr = %bind_addr, "Listening");
let http_metrics_server = http_metrics.clone();
let cors_origins: Vec<String> = cfg
.env
.get("CORS_ORIGINS")
.map(|s| {
s.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
})
.unwrap_or_else(|| vec!["http://localhost:5173".to_string()]);
let cookie_secure = cfg
.env
.get("APP_COOKIE_SECURE")
.map(|s| s != "false")
.unwrap_or(true);
tracing::info!(cookie_secure = cookie_secure, "Cookie secure mode");
HttpServer::new(move || {
let mut cors = Cors::default();
for origin in &cors_origins {
cors = cors.allowed_origin(origin);
}
let cors = cors
.allowed_methods(["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"])
.allowed_headers([
"Content-Type",
"Authorization",
"X-Requested-With",
"Accept",
"Origin",
])
.supports_credentials()
.max_age(3600);
let security_headers = actix_web::middleware::DefaultHeaders::new()
.add(("X-Content-Type-Options", "nosniff"))
.add(("X-Frame-Options", "DENY"))
.add(("Referrer-Policy", "strict-origin-when-cross-origin"));
let session_mw = SessionMiddleware::builder(store.clone(), session_key.clone())
.cookie_name("id".to_string())
.cookie_path("/".to_string())
.cookie_secure(cookie_secure)
.cookie_http_only(true)
.session_lifecycle(SessionLifecycle::PersistentSession(
PersistentSession::default()
.session_ttl(Duration::days(30))
.session_ttl_extension_policy(TtlExtensionPolicy::OnEveryRequest),
))
.build();
let metrics_mw = MetricsMiddleware::new(http_metrics_server.clone());
App::new()
.wrap(cors)
.wrap(security_headers)
.wrap(session_mw)
.wrap(RequestLogger)
.wrap(metrics_mw)
.wrap(TracingSpanMiddleware::new())
.app_data(web::Data::new(AppState {
db: db.clone(),
cache: cache.clone(),
}))
.app_data(web::Data::new(service.clone()))
.app_data(web::Data::new(cfg.clone()))
.app_data(web::Data::new(db.clone()))
.app_data(web::Data::new(cache.clone()))
.app_data(http_snapshot_data.clone())
.app_data(prometheus_handle_data.clone())
.route("/robots.txt", web::get().to(robots::robots))
.route("/sitemap.xml", web::get().to(sidemap::sitemap))
.service(
web::scope("/sidemap")
.route("", web::get().to(sidemap::sitemap))
.route("/static", web::get().to(sidemap::sitemap_static))
.route("/users", web::get().to(sidemap::sitemap_users))
.route("/projects", web::get().to(sidemap::sitemap_projects))
.route("/repos", web::get().to(sidemap::sitemap_repos)),
)
.route("/health", web::get().to(health_check))
.route("/metrics", web::get().to(prometheus_handler))
.configure(api::route::init_routes)
})
.bind(&bind_addr)?
.run()
.await?;
tracing::info!("Server stopped, shutting down room workers");
let _ = shutdown_tx.send(());
let _ = worker_handle.await;
tracing::info!("Room workers stopped");
Ok(())
}
async fn run_migrations(db: &AppDatabase) -> anyhow::Result<()> {
tracing::info!("Running database migrations...");
Migrator::up(db.writer(), None)
.await
.map_err(|e| anyhow::anyhow!("Migration failed: {:?}", e))?;
tracing::info!("Migrations completed");
Ok(())
}
async fn health_check(state: web::Data<AppState>) -> HttpResponse {
let db_ok = db_ping(&state.db).await;
let cache_ok = cache_ping(&state.cache).await;
let healthy = db_ok && cache_ok;
if healthy {
HttpResponse::Ok().json(serde_json::json!({
"status": "ok",
"db": "ok",
"cache": "ok",
}))
} else {
HttpResponse::ServiceUnavailable().json(serde_json::json!({
"status": "unhealthy",
"db": if db_ok { "ok" } else { "error" },
"cache": if cache_ok { "ok" } else { "error" },
}))
}
}
async fn db_ping(db: &AppDatabase) -> bool {
let writer_ok = db.writer().execute_unprepared("SELECT 1").await.is_ok();
let reader_ok = db.reader().execute_unprepared("SELECT 1").await.is_ok();
writer_ok && reader_ok
}
async fn cache_ping(cache: &AppCache) -> bool {
cache.conn().await.is_ok()
}

View File

@ -1,36 +0,0 @@
[package]
name = "email-server"
version.workspace = true
edition.workspace = true
authors.workspace = true
description.workspace = true
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[[bin]]
name = "email-worker"
path = "src/main.rs"
[dependencies]
tokio = { workspace = true, features = ["full"] }
service = { workspace = true }
db = { workspace = true }
config = { workspace = true }
tracing = { workspace = true }
observability = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
chrono = { workspace = true, features = ["serde"] }
hyper = { workspace = true }
serde_json = { workspace = true }
sea-orm = { workspace = true }
metrics = "0.22"
metrics-exporter-prometheus = "0.13"
[lints]
workspace = true

View File

@ -1,165 +0,0 @@
use clap::Parser;
use config::AppConfig;
use metrics::{Unit, describe_counter};
use metrics_exporter_prometheus::PrometheusHandle;
use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
use sea_orm::ConnectionTrait;
use service::AppService;
use std::sync::Arc;
#[derive(Parser, Debug)]
#[command(name = "email-worker")]
#[command(version)]
struct Args {
#[arg(long, default_value = "info")]
log_level: String,
}
async fn http_handler(
db: Arc<db::database::AppDatabase>,
cache: Arc<db::cache::AppCache>,
metrics: Arc<PrometheusHandle>,
req: hyper::Request<hyper::Body>,
) -> Result<hyper::Response<hyper::Body>, std::convert::Infallible> {
match req.uri().path() {
"/health" => {
let writer_ok = db.writer().execute_unprepared("SELECT 1").await.is_ok();
let reader_ok = db.reader().execute_unprepared("SELECT 1").await.is_ok();
let db_ok = writer_ok && reader_ok;
let cache_ok = cache.conn().await.is_ok();
let body = serde_json::json!({
"status": if db_ok && cache_ok { "ok" } else { "unhealthy" },
"db": if db_ok { "ok" } else { "error" },
"cache": if cache_ok { "ok" } else { "error" },
});
let status = if db_ok && cache_ok { 200 } else { 503 };
let body_bytes = match serde_json::to_string(&body) {
Ok(s) => hyper::Body::from(s),
Err(e) => {
return Ok(hyper::Response::builder()
.status(500)
.body(hyper::Body::from(format!("serialize error: {}", e)))
.expect("static response"));
}
};
Ok(hyper::Response::builder()
.status(status)
.header("content-type", "application/json")
.body(body_bytes)
.expect("static response"))
}
"/metrics" => {
let body = metrics.render();
Ok(hyper::Response::builder()
.status(200)
.header("content-type", "text/plain; version=0.0.4; charset=utf-8")
.body(hyper::Body::from(body))
.unwrap())
}
_ => Ok(hyper::Response::builder()
.status(404)
.body(hyper::Body::from("not found"))
.unwrap()),
}
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let args = Args::parse();
let cfg = AppConfig::load();
init_tracing_subscriber(&args.log_level, false);
// Pre-register all email/queue metrics so they appear in /metrics even before first event.
describe_counter!(
"email_queued_total",
Unit::Count,
"Emails written to Redis stream"
);
describe_counter!(
"email_consumed_total",
Unit::Count,
"Emails consumed from queue"
);
describe_counter!(
"email_batch_size",
Unit::Count,
"Email consumer batch sizes accumulated"
);
describe_counter!(
"email_validation_skipped_total",
Unit::Count,
"Emails skipped due to invalid recipient"
);
describe_counter!(
"email_build_errors_total",
Unit::Count,
"Email message build failures"
);
describe_counter!(
"email_send_attempts_total",
Unit::Count,
"SMTP send attempts (including retries)"
);
describe_counter!("email_sent_total", Unit::Count, "Emails sent successfully");
describe_counter!(
"email_send_failures_total",
Unit::Count,
"Emails that failed after all retries"
);
let metrics_handle = Arc::new(install_recorder());
let http_metrics = Arc::new(HttpMetrics::new()); // Worker app — HTTP section will be empty
// Metrics pusher: periodically push all metrics to apps/metrics aggregator
if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
let pusher = MetricsPusher::new(&push_url, "email");
pusher.spawn(
http_metrics.clone(),
metrics_handle.clone(),
std::time::Duration::from_secs(15),
);
tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
}
tracing::info!("Starting email worker");
let service = AppService::new(cfg).await?;
let db = Arc::new(service.db.clone());
let cache = Arc::new(service.cache.clone());
let (shutdown_tx, shutdown_rx) = tokio::sync::broadcast::channel::<()>(1);
tokio::spawn(async move {
tokio::signal::ctrl_c().await.ok();
tracing::info!("shutting down email worker");
let _ = shutdown_tx.send(());
});
// Start health/metrics server on a dedicated port
let health_db = db.clone();
let health_cache = cache.clone();
let health_metrics = metrics_handle.clone();
let health_addr: std::net::SocketAddr = ([0, 0, 0, 0], 8084).into();
let health_service = hyper::service::make_service_fn(move |_| {
let db = health_db.clone();
let cache = health_cache.clone();
let metrics = health_metrics.clone();
let service = hyper::service::service_fn(move |req| {
http_handler(db.clone(), cache.clone(), metrics.clone(), req)
});
async move { Ok::<_, std::convert::Infallible>(service) }
});
let health_server = hyper::Server::bind(&health_addr).serve(health_service);
tracing::info!(port = 8084, "health/metrics server started");
tokio::spawn(async move {
if let Err(e) = health_server.await {
tracing::error!("health check server error: {}", e);
}
});
service.start_email_workers(shutdown_rx).await?;
tracing::info!("email worker stopped");
Ok(())
}

View File

@ -1,35 +0,0 @@
[package]
name = "git-hook"
version.workspace = true
edition.workspace = true
authors.workspace = true
description.workspace = true
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[dependencies]
tokio = { workspace = true, features = ["full"] }
git = { workspace = true }
observability = { workspace = true }
db = { workspace = true }
config = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true, features = ["json"] }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
tokio-util = { workspace = true }
hyper = { workspace = true }
serde_json = { workspace = true }
sea-orm = { workspace = true }
metrics = "0.22"
metrics-exporter-prometheus = "0.13"
chrono = { workspace = true, features = ["serde"] }
reqwest = { workspace = true }
agent = { workspace = true }
models = { workspace = true }
async-trait = { workspace = true }

View File

@ -1,10 +0,0 @@
use clap::Parser;
#[derive(Parser, Debug)]
#[command(name = "git-hook")]
#[command(version)]
pub struct HookArgs {
/// Worker ID for this instance. Defaults to the HOOK_POOL_WORKER_ID env var or a generated UUID.
#[arg(long)]
pub worker_id: Option<String>,
}

View File

@ -1,256 +0,0 @@
use clap::Parser;
use config::AppConfig;
use db::cache::AppCache;
use db::database::AppDatabase;
use git::hook::HookService;
use git::hook::embed::TagEmbedder;
use metrics::{Unit, describe_counter};
use metrics_exporter_prometheus::PrometheusHandle;
use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
use sea_orm::ConnectionTrait;
use std::sync::Arc;
use tokio::signal;
mod args;
use args::HookArgs;
/// Initialize EmbedService from config (graceful degradation).
async fn init_embed_service(
cfg: &AppConfig,
db: &AppDatabase,
) -> Result<agent::embed::EmbedService, Box<dyn std::error::Error + Send + Sync>> {
let client = agent::new_embed_client(cfg).await?;
let model_name = cfg
.get_embed_model_name()
.unwrap_or_else(|_| "text-embedding-3-small".into());
let dimensions = cfg.get_embed_model_dimensions().unwrap_or(1536);
let svc = agent::embed::EmbedService::new(client, db.writer().clone(), model_name, dimensions);
let _ = svc.ensure_collections().await;
tracing::info!("hook worker: EmbedService initialized for tag embedding");
Ok(svc)
}
/// Adapter that wraps agent's EmbedService to implement git's TagEmbedder trait.
struct EmbedServiceAdapter(agent::embed::EmbedService);
#[async_trait::async_trait]
impl TagEmbedder for EmbedServiceAdapter {
async fn embed_tags_batch(
&self,
tags: Vec<models::TagEmbedInput>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Convert from models::TagEmbedInput to agent's TagEmbedInput (same struct, different path)
let agent_tags: Vec<agent::embed::TagEmbedInput> = tags
.into_iter()
.map(|t| agent::embed::TagEmbedInput {
repo_id: t.repo_id,
repo_name: t.repo_name,
project_id: t.project_id,
name: t.name,
description: t.description,
})
.collect();
self.0
.embed_tags_batch(agent_tags)
.await
.map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)
}
}
async fn http_handler(
db: Arc<AppDatabase>,
cache: Arc<AppCache>,
metrics: Arc<PrometheusHandle>,
req: hyper::Request<hyper::Body>,
) -> Result<hyper::Response<hyper::Body>, std::convert::Infallible> {
match req.uri().path() {
"/health" => {
let writer_ok = db.writer().execute_unprepared("SELECT 1").await.is_ok();
let reader_ok = db.reader().execute_unprepared("SELECT 1").await.is_ok();
let db_ok = writer_ok && reader_ok;
let cache_ok = cache.conn().await.is_ok();
let body = serde_json::json!({
"status": if db_ok && cache_ok { "ok" } else { "unhealthy" },
"db": if db_ok { "ok" } else { "error" },
"cache": if cache_ok { "ok" } else { "error" },
});
let status = if db_ok && cache_ok { 200 } else { 503 };
let body_bytes = match serde_json::to_string(&body) {
Ok(s) => hyper::Body::from(s),
Err(e) => {
return Ok(hyper::Response::builder()
.status(500)
.body(hyper::Body::from(format!("serialize error: {}", e)))
.expect("static response"));
}
};
Ok(hyper::Response::builder()
.status(status)
.header("content-type", "application/json")
.body(body_bytes)
.expect("static response"))
}
"/metrics" => {
let body = metrics.render();
Ok(hyper::Response::builder()
.status(200)
.header("content-type", "text/plain; version=0.0.4; charset=utf-8")
.body(hyper::Body::from(body))
.expect("static response"))
}
_ => Ok(hyper::Response::builder()
.status(404)
.body(hyper::Body::from("not found"))
.expect("static response")),
}
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cfg = AppConfig::load();
let log_level = cfg.log_level().unwrap_or_else(|_| "info".to_string());
init_tracing_subscriber(&log_level, false);
// Pre-register all hook metrics so they appear in /metrics even before first increment.
describe_counter!("hook_tasks_total", Unit::Count, "Total hook tasks dequeued");
describe_counter!(
"hook_tasks_success_total",
Unit::Count,
"Hook tasks completed successfully"
);
describe_counter!(
"hook_tasks_failed_total",
Unit::Count,
"Hook tasks that failed"
);
describe_counter!(
"hook_tasks_locked_total",
Unit::Count,
"Hook tasks re-queued due to repo lock"
);
describe_counter!(
"hook_tasks_retried_total",
Unit::Count,
"Hook tasks that entered retry"
);
describe_counter!(
"hook_tasks_exhausted_total",
Unit::Count,
"Hook tasks that exhausted retries"
);
describe_counter!(
"hook_sync_branches_changed_total",
Unit::Count,
"Branches changed during sync"
);
describe_counter!(
"hook_sync_tags_changed_total",
Unit::Count,
"Tags changed during sync"
);
let metrics_handle = Arc::new(install_recorder());
let http_metrics = Arc::new(HttpMetrics::new()); // Worker app — HTTP section will be empty
// Metrics pusher: periodically push all metrics to apps/metrics aggregator
if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
let pusher = MetricsPusher::new(&push_url, "git-hook");
pusher.spawn(
http_metrics.clone(),
metrics_handle.clone(),
std::time::Duration::from_secs(15),
);
tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
}
let db = Arc::new(AppDatabase::init(&cfg).await?);
tracing::info!("database connected");
// 4. Connect to Redis cache (also provides the cluster pool for hook queue)
let cache = Arc::new(AppCache::init(&cfg).await?);
tracing::info!("cache connected");
// 5. Parse CLI args
let _args = HookArgs::parse();
tracing::info!("git-hook worker starting");
// 6. Build and start git hook service
let mut hooks = HookService::new(
(*db).clone(),
(*cache).clone(),
cache.redis_pool().clone(),
cfg.clone(),
);
// Optionally initialize tag embedding
if let Ok(embed_svc) = init_embed_service(&cfg, &db).await {
let adapter = EmbedServiceAdapter(embed_svc);
hooks = hooks.with_tag_embedder(Arc::new(adapter));
}
let cancel = hooks.start_worker().await;
let cancel_signal = cancel.clone();
// 7. Start health/metrics server on a dedicated port
let health_db = db.clone();
let health_cache = cache.clone();
let health_metrics = metrics_handle.clone();
let health_addr: std::net::SocketAddr = ([0, 0, 0, 0], 8083).into();
let health_service = hyper::service::make_service_fn(move |_| {
let db = health_db.clone();
let cache = health_cache.clone();
let metrics = health_metrics.clone();
let service = hyper::service::service_fn(move |req| {
http_handler(db.clone(), cache.clone(), metrics.clone(), req)
});
async move { Ok::<_, std::convert::Infallible>(service) }
});
let health_server = hyper::Server::bind(&health_addr).serve(health_service);
tracing::info!(port = 8083, "health/metrics server started");
tokio::spawn(async move {
if let Err(e) = health_server.await {
tracing::error!("health check server error: {}", e);
}
});
// Spawn signal handler that cancels on SIGINT/SIGTERM
tokio::spawn(async move {
let ctrl_c = async {
signal::ctrl_c()
.await
.expect("failed to install CTRL+C handler");
};
#[cfg(unix)]
let term = async {
use tokio::signal::unix::{SignalKind, signal};
let mut sig =
signal(SignalKind::terminate()).expect("failed to install SIGTERM handler");
sig.recv().await;
};
#[cfg(not(unix))]
let term = std::future::pending::<()>();
tokio::select! {
_ = ctrl_c => {
tracing::info!("received SIGINT, initiating shutdown");
}
_ = term => {
tracing::info!("received SIGTERM, initiating shutdown");
}
}
cancel_signal.cancel();
});
// Wait until the worker is cancelled (by signal handler or otherwise)
cancel.cancelled().await;
tracing::info!("git-hook worker stopped");
Ok(())
}

View File

@ -1,31 +0,0 @@
[package]
name = "gitserver"
version.workspace = true
edition.workspace = true
authors.workspace = true
description.workspace = true
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[[bin]]
name = "gitserver"
path = "src/main.rs"
[dependencies]
tokio = { workspace = true, features = ["full"] }
git = { workspace = true }
observability = { workspace = true }
tracing = { workspace = true }
db = { workspace = true }
config = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
chrono = { workspace = true, features = ["serde"] }
[lints]
workspace = true

View File

@ -1,59 +0,0 @@
use clap::Parser;
use config::AppConfig;
use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
use std::sync::Arc;
#[derive(Parser, Debug)]
#[command(name = "gitserver")]
#[command(version)]
struct Args {
#[arg(long, default_value = "info")]
log_level: String,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let args = Args::parse();
let cfg = AppConfig::load();
init_tracing_subscriber(&args.log_level, false);
let prometheus_handle = Arc::new(install_recorder());
let http_metrics = Arc::new(HttpMetrics::new());
// Metrics pusher: periodically push all metrics to apps/metrics aggregator
if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
let pusher = MetricsPusher::new(&push_url, "gitserver");
pusher.spawn(
http_metrics.clone(),
prometheus_handle.clone(),
std::time::Duration::from_secs(15),
);
tracing::info!(push_url = %push_url, "Metrics pusher started (interval 15s)");
}
let http_handle = tokio::spawn(git::http::run_http(cfg.clone()));
let ssh_handle = tokio::spawn(git::ssh::run_ssh(cfg));
tokio::select! {
result = http_handle => {
match result {
Ok(Ok(())) => tracing::info!("HTTP server stopped"),
Ok(Err(e)) => tracing::error!("HTTP server error: {}", e),
Err(e) => tracing::error!("HTTP server task panicked: {}", e),
}
}
result = ssh_handle => {
match result {
Ok(Ok(())) => tracing::info!("SSH server stopped"),
Ok(Err(e)) => tracing::error!("SSH server error: {}", e),
Err(e) => tracing::error!("SSH server task panicked: {}", e),
}
}
_ = tokio::signal::ctrl_c() => {
tracing::info!("received shutdown signal");
}
}
tracing::info!("shutting down");
Ok(())
}

View File

@ -1,58 +0,0 @@
[package]
name = "metrics-aggregator"
version.workspace = true
edition.workspace = true
authors.workspace = true
description = "Unified observability aggregator: scrapes metrics, forwards traces, collects logs"
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[[bin]]
name = "metrics-aggregator"
path = "src/main.rs"
[dependencies]
tokio = { workspace = true, features = ["full"] }
config = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }
observability = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive", "env"] }
serde_json = { workspace = true }
chrono = { workspace = true, features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
# HTTP server
actix-web = "4.13.0"
actix-rt = "2.11.0"
# HTTP client for scraping (uses awc = actix-web client, no extra TLS deps)
awc = { workspace = true }
# HTTP client for Loki (reqwest is Send+Sync, unlike awc::Client)
reqwest = { workspace = true, features = ["json"] }
# Metrics
metrics = { workspace = true }
metrics-exporter-prometheus = { version = "0.18", default-features = false, features = ["http-listener", "tokio"] }
# Observability
opentelemetry = { workspace = true }
opentelemetry_sdk = { workspace = true }
opentelemetry-otlp = { version = "0.31.0", default-features = false, features = ["http-proto", "tokio", "trace", "tonic"] }
tracing-opentelemetry = "0.32.1"
tokio-util = { workspace = true }
tokio-stream = { workspace = true }
futures = { workspace = true }
url = { workspace = true }
tower = { workspace = true }
[lints]
workspace = true

View File

@ -1,35 +0,0 @@
use clap::Parser;
#[derive(Parser, Debug)]
#[command(name = "metrics-aggregator")]
#[command(version)]
pub struct Args {
#[arg(long, default_value = "9090", env = "METRICS_AGGREGATOR_PORT")]
pub port: u16,
#[arg(long, env = "OTEL_EXPORTER_OTLP_ENDPOINT")]
pub otel_endpoint: Option<String>,
#[arg(long, env = "LOKI_URL")]
pub loki_url: Option<String>,
#[arg(long, default_value = "15", env = "SCRAPE_INTERVAL_SECS")]
pub scrape_interval_secs: u64,
/// JSON file with scrape targets.
#[arg(long, env = "SCRAPE_TARGETS_FILE")]
pub targets_file: Option<String>,
#[arg(long, default_value = "info", env = "LOG_LEVEL")]
pub log_level: String,
/// Comma-separated list of app names to scrape.
#[arg(long, env = "SCRAPE_APPS")]
pub scrape_apps: Option<String>,
#[arg(long)]
pub no_otel: bool,
#[arg(long)]
pub no_loki: bool,
}

View File

@ -1,40 +0,0 @@
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::target::{ScrapeTarget, load_targets_from_file};
pub async fn watch_targets_file(
path: String,
targets: Arc<RwLock<Vec<ScrapeTarget>>>,
mut shutdown: tokio::sync::broadcast::Receiver<()>,
) {
let mtime_path = path;
let mut last_mtime: Option<std::time::SystemTime> = None;
loop {
tokio::select! {
_ = shutdown.recv() => break,
_ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {
let metadata = match tokio::fs::metadata(&mtime_path).await {
Ok(m) => m,
Err(_) => continue,
};
let current_mtime = metadata.modified().ok();
if current_mtime != last_mtime {
last_mtime = current_mtime;
match load_targets_from_file(&mtime_path).await {
Ok(new_targets) => {
let mut guard = targets.write().await;
*guard = new_targets;
tracing::info!(path = %mtime_path, "targets file reloaded");
}
Err(e) => {
tracing::warn!(error = %e, "failed to reload targets file");
}
}
}
}
}
}
}

View File

@ -1,70 +0,0 @@
use std::time::Duration;
use awc::Client;
use crate::target::ScrapeTarget;
pub async fn k8s_pod_discovery() -> Option<Vec<ScrapeTarget>> {
let pod_namespace = std::env::var("POD_NAMESPACE").ok()?;
let token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token";
let token = tokio::fs::read_to_string(token_path).await.ok()?;
let client = Client::builder()
.timeout(Duration::from_secs(5))
.add_default_header((
awc::http::header::AUTHORIZATION.as_str(),
format!("Bearer {}", token),
))
.finish();
let api_url = format!(
"https://kubernetes.default.svc/api/v1/namespaces/{}/pods",
pod_namespace
);
let mut response = client.get(api_url).send().await.ok()?;
let body_bytes = response.body().await.ok()?;
let pod_list: serde_json::Value = serde_json::from_slice(&body_bytes).ok()?;
let targets: Vec<ScrapeTarget> = pod_list["items"]
.as_array()?
.iter()
.filter_map(|pod| {
let name = pod["metadata"]["name"].as_str()?.to_string();
let phase = pod["status"]["phase"].as_str()?;
if phase != "Running" {
return None;
}
let pod_ip = pod["status"]["podIP"].as_str()?;
let annotations = pod["metadata"]["annotations"].as_object()?;
let port: u16 = annotations
.get("metrics.port")
.and_then(|v| v.as_str())
.and_then(|s| s.parse().ok())
.unwrap_or(8080);
let path = annotations
.get("metrics.path")
.and_then(|v| v.as_str())
.unwrap_or("/metrics");
let labels = pod["metadata"]["labels"]
.as_object()
.map(|m| {
m.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect()
})
.unwrap_or_default();
Some(ScrapeTarget {
name,
addr: format!("{}:{}", pod_ip, port),
metrics_path: path.to_string(),
labels,
})
})
.collect();
Some(targets)
}

View File

@ -1,70 +0,0 @@
use chrono::{DateTime, Utc};
use reqwest::Client;
use serde::Serialize;
use std::collections::HashMap;
#[derive(Clone)]
pub struct LokiForwarder {
url: String,
client: Client,
labels: HashMap<String, String>,
}
impl LokiForwarder {
pub fn new(url: String) -> Self {
Self {
url,
client: Client::builder()
.timeout(std::time::Duration::from_secs(5))
.build()
.expect("valid reqwest client"),
labels: HashMap::new(),
}
}
pub async fn push(&self, log_entries: Vec<LokiEntry>) -> anyhow::Result<()> {
if log_entries.is_empty() {
return Ok(());
}
let streams: Vec<LokiStream> = vec![LokiStream {
stream: self.labels.clone(),
values: log_entries
.into_iter()
.map(|e| (format!("{}", e.timestamp), e.line))
.collect(),
}];
let payload = LokiPayload { streams };
let resp = self
.client
.post(&self.url)
.header("Content-Type", "application/json")
.json(&payload)
.send()
.await;
match resp {
Ok(r) if r.status().is_success() => Ok(()),
Ok(r) => anyhow::bail!("Loki push failed: {}", r.status()),
Err(e) => anyhow::bail!("Loki push error: {}", e),
}
}
}
#[derive(Serialize)]
struct LokiPayload {
streams: Vec<LokiStream>,
}
#[derive(Serialize)]
struct LokiStream {
stream: HashMap<String, String>,
values: Vec<(String, String)>,
}
pub struct LokiEntry {
pub timestamp: DateTime<Utc>,
pub line: String,
}

View File

@ -1,633 +0,0 @@
//! Unified observability aggregator for in-cluster deployment.
//!
//! Collects metrics from all app pods via Prometheus scrape, forwards traces
//! to OTLP endpoint, and streams logs from all pods to Loki-compatible backend.
//!
//! Usage:
//! METRICS_AGGREGATOR_PORT=9090 \
//! OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317 \
//! LOKI_URL=http://loki:3100/loki/api/v1/push \
//! SCRAPE_INTERVAL_SECS=15 \
//! SCRAPE_TARGETS_FILE=/etc/metrics/targets.json \
//! metrics-aggregator
mod args;
mod hotreload;
mod k8s_discovery;
mod loki;
mod metrics;
mod otel;
mod scrape;
mod stats_store;
mod target;
use serde::Deserialize;
use std::collections::HashMap;
use std::fmt::Write as _;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use actix_web::{HttpResponse, HttpServer, web};
use clap::Parser;
use loki::{LokiEntry, LokiForwarder};
use metrics::AggMetrics;
use observability::{init_tracing_subscriber, install_recorder, instance_id};
use otel::OtelGuard;
use scrape::{HttpClient, ScrapeResult};
use stats_store::StatsStore;
use target::ScrapeTarget;
use tokio::io::AsyncBufReadExt;
use tokio::sync::{RwLock, broadcast};
use tokio::time::interval;
type MetricsStore = Arc<RwLock<HashMap<String, Vec<scrape::PromMetric>>>>;
// StatsStore is defined in stats_store.rs — per-app aggregated data.
#[actix_web::main]
async fn main() -> std::io::Result<()> {
let args = args::Args::parse();
init_tracing_subscriber(&args.log_level, false);
let instance = instance_id();
tracing::info!(
instance = %instance,
port = args.port,
scrape_interval = args.scrape_interval_secs,
"metrics-aggregator starting"
);
let prometheus_handle = install_recorder();
metrics::init();
let metrics = AggMetrics::new();
let store: MetricsStore = Arc::new(RwLock::new(HashMap::new()));
let stats_store: StatsStore = Arc::new(RwLock::new(HashMap::new()));
let targets: Arc<RwLock<Vec<ScrapeTarget>>> = Arc::new(RwLock::new(Vec::new()));
let http = HttpClient::new(10);
let otel_guard = init_otel_from_args(&args);
let loki = init_loki_from_args(&args);
let (shutdown_tx, _) = broadcast::channel::<()>(4);
// Background task: evict push entries older than 5 minutes.
let stats_store_for_evict = stats_store.clone();
let mut evict_shutdown = shutdown_tx.subscribe();
tokio::spawn(async move {
let mut ticker = interval(Duration::from_secs(30));
loop {
tokio::select! {
_ = evict_shutdown.recv() => break,
_ = ticker.tick() => {
let cutoff = chrono::Utc::now().timestamp() - 300;
let mut guard = stats_store_for_evict.write().await;
guard.retain(|_, entry| entry.last_seen >= cutoff);
}
}
}
});
if let Some(path) = &args.targets_file {
match target::load_targets_from_file(path).await {
Ok(initial_targets) => {
let mut guard = targets.write().await;
*guard = initial_targets;
tracing::info!(count = guard.len(), "loaded initial targets from file");
}
Err(e) => {
tracing::warn!(error = %e, "failed to load targets file");
}
}
let tw =
hotreload::watch_targets_file(path.clone(), targets.clone(), shutdown_tx.subscribe());
tokio::spawn(tw);
} else if std::env::var("KUBERNETES_SERVICE_HOST").is_ok() {
if let Some(k8s_targets) = k8s_discovery::k8s_pod_discovery().await {
let mut guard = targets.write().await;
*guard = k8s_targets.clone();
tracing::info!(count = guard.len(), "discovered K8s pods as targets");
}
}
let scrape_filter = args
.scrape_apps
.as_ref()
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect());
let scrape_targets = targets.clone();
let scrape_store = store.clone();
let scrape_metrics = metrics.clone();
let scrape_http = http.clone();
let loki_clone = loki.clone();
let shutdown_tx_clone = shutdown_tx.clone();
let scrape_interval = args.scrape_interval_secs;
let scrape_filter_clone = scrape_filter.clone();
tokio::task::spawn_local(async move {
scrape_loop(
scrape_targets,
scrape_store,
scrape_metrics,
scrape_http,
scrape_interval,
scrape_filter_clone,
loki_clone,
shutdown_tx_clone.subscribe(),
)
.await;
});
let log_shutdown = shutdown_tx.subscribe();
let log_loki = loki.clone();
tokio::task::spawn_local(async move {
log_collector(log_loki, log_shutdown).await;
});
let bind_addr: SocketAddr = ([0, 0, 0, 0], args.port).into();
tracing::info!(addr = %bind_addr, "HTTP server starting");
let app_targets = targets.clone();
let app_store = store.clone();
let app_handle = prometheus_handle.clone();
let loki_for_push: Option<Arc<LokiForwarder>> = loki.map(Arc::new);
let app_stats = stats_store.clone();
let server = HttpServer::new(move || {
let targets = app_targets.clone();
let store = app_store.clone();
let handle = app_handle.clone();
let stats_store = app_stats.clone();
let loki_for_push: Option<Arc<LokiForwarder>> = loki_for_push.clone();
actix_web::App::new()
.app_data(web::Data::new(targets))
.app_data(web::Data::new(store))
.app_data(web::Data::new(handle))
.app_data(web::Data::new(stats_store))
.app_data(web::Data::new(loki_for_push))
.route("/metrics", web::get().to(handle_metrics))
.route("/api/v1/metrics", web::get().to(handle_metrics))
.route("/api/v1/push", web::post().to(handle_push))
.route("/api/v1/dashboard", web::get().to(handle_dashboard))
.route("/api/v1/stats", web::get().to(handle_stats))
.route("/health", web::get().to(handle_health))
.route("/api/v1/health", web::get().to(handle_health))
.route("/api/v1/targets", web::get().to(handle_targets))
})
.bind(&bind_addr)?
.run();
let server_handle = server.handle();
tokio::spawn(server);
tokio::signal::ctrl_c().await.ok();
tracing::info!("received Ctrl+C, shutting down");
let _ = shutdown_tx.send(());
server_handle.stop(true).await;
if let Some(guard) = otel_guard {
guard.shutdown().await;
}
tracing::info!("metrics-aggregator stopped");
Ok(())
}
fn init_otel_from_args(args: &args::Args) -> Option<OtelGuard> {
if args.no_otel {
return None;
}
let endpoint = args
.otel_endpoint
.clone()
.or_else(|| std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok())?;
match otel::init_otel(&endpoint, "metrics-aggregator") {
Ok(guard) => {
tracing::info!(endpoint = %endpoint, "OTLP tracing enabled");
Some(guard)
}
Err(e) => {
tracing::warn!(error = %e, "OTLP init failed, continuing without traces");
None
}
}
}
fn init_loki_from_args(args: &args::Args) -> Option<LokiForwarder> {
if args.no_loki {
return None;
}
let url = args
.loki_url
.clone()
.or_else(|| std::env::var("LOKI_URL").ok())?;
tracing::info!("Loki log forwarding enabled");
Some(LokiForwarder::new(url))
}
async fn handle_metrics(
store: web::Data<MetricsStore>,
stats_store: web::Data<StatsStore>,
handle: web::Data<observability::PrometheusHandle>,
) -> HttpResponse {
let extra = vec![("aggregator_instance".to_string(), "default".to_string())];
let scraped = render_aggregated_metrics(store, extra.clone()).await;
let pushed = render_pushed_metrics(stats_store).await;
let combined = format!("{}{}{}", handle.render(), scraped, pushed);
HttpResponse::Ok()
.content_type("text/plain; version=0.0.4; charset=utf-8")
.body(combined)
}
async fn handle_health() -> HttpResponse {
HttpResponse::Ok()
.content_type("application/json")
.body(r#"{"status":"ok"}"#)
}
async fn handle_targets(targets: web::Data<Arc<RwLock<Vec<ScrapeTarget>>>>) -> HttpResponse {
let guard = targets.read().await;
let json = serde_json::to_string(&*guard).unwrap_or_default();
HttpResponse::Ok()
.content_type("application/json")
.body(json)
}
// ── Push endpoint payload ────────────────────────────────────────────────────
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct PushPayload {
app: String,
#[serde(default)]
instance: String,
timestamp: i64,
#[serde(default)]
http: Option<observability::push::HttpPayload>,
#[serde(default)]
system: Option<observability::push::SystemPayload>,
#[serde(default)]
business: HashMap<String, f64>,
#[serde(default)]
token_usage: Option<observability::push::TokenUsagePayload>,
#[serde(default)]
tasks: Option<observability::push::TaskStatsPayload>,
#[serde(default)]
latency: HashMap<String, observability::push::LatencySnapshot>,
#[serde(default)]
logs: Vec<observability::push::LogEntry>,
}
async fn handle_push(
stats_store: web::Data<StatsStore>,
loki: web::Data<Option<Arc<LokiForwarder>>>,
payload: web::Json<PushPayload>,
) -> HttpResponse {
let app = payload.app.clone();
stats_store::merge_push_payload(
&stats_store,
&app,
&payload.instance,
payload.timestamp,
payload.http.as_ref(),
payload.system.as_ref(),
&payload.business,
payload.token_usage.as_ref(),
payload.tasks.as_ref(),
&payload.latency,
&payload.logs,
)
.await;
// Forward logs to Loki if configured
if !payload.logs.is_empty() {
if let Some(loki_fwd) = loki.as_ref() {
let entries: Vec<LokiEntry> = payload
.logs
.iter()
.map(|l| LokiEntry {
timestamp: chrono::DateTime::from_timestamp(l.timestamp, 0)
.unwrap_or_else(chrono::Utc::now),
line: format!("[{}] {}", l.level.to_lowercase(), l.message),
})
.collect();
if let Err(e) = loki_fwd.push(entries).await {
tracing::warn!(error = %e, "loki push on /push failed");
}
}
}
HttpResponse::Ok().body("ok")
}
async fn scrape_loop(
targets: Arc<RwLock<Vec<ScrapeTarget>>>,
store: MetricsStore,
metrics: AggMetrics,
http: HttpClient,
interval_secs: u64,
scrape_apps_filter: Option<Vec<String>>,
_loki: Option<LokiForwarder>,
mut shutdown: broadcast::Receiver<()>,
) {
let mut ticker = interval(Duration::from_secs(interval_secs));
loop {
tokio::select! {
_ = shutdown.recv() => break,
_ = ticker.tick() => {
let targets_snapshot = targets.read().await.clone();
let count = targets_snapshot.len() as u64;
metrics.targets_total.set(count as f64);
let mut healthy_count = 0u64;
for target in &targets_snapshot {
if let Some(ref filter) = scrape_apps_filter {
if !filter.contains(&target.name) {
continue;
}
}
metrics.scrape_total.increment(1);
match http.scrape(target).await {
ScrapeResult::Success(body, duration_ms) => {
metrics.scrape_success.increment(1);
metrics.scrape_duration.record(duration_ms);
let parsed = scrape::parse_prometheus(&body);
update_store(store.clone(), &target.name, parsed).await;
healthy_count += 1;
}
ScrapeResult::Timeout => {
metrics.scrape_failures.increment(1);
metrics.scrape_errors_timeout.increment(1);
tracing::warn!(target = %target.name, "scrape timeout");
}
ScrapeResult::ConnectionError(e) => {
metrics.scrape_failures.increment(1);
metrics.scrape_errors_connection.increment(1);
tracing::warn!(target = %target.name, error = %e, "scrape connection error");
}
ScrapeResult::HttpError(status) => {
metrics.scrape_failures.increment(1);
tracing::warn!(target = %target.name, status = status, "scrape HTTP error");
}
}
}
metrics.targets_healthy.set(healthy_count as f64);
}
}
}
}
async fn update_store(store: MetricsStore, target_name: &str, metrics: Vec<scrape::PromMetric>) {
let mut guard = store.write().await;
guard.insert(target_name.to_string(), metrics);
}
async fn render_aggregated_metrics(
store: web::Data<MetricsStore>,
extra_group_labels: Vec<(String, String)>,
) -> String {
let guard = store.read().await;
let mut output = String::new();
for (target_name, metrics) in guard.iter() {
for metric in metrics {
let mut labels = metric.labels.clone();
labels.insert(
"aggregated_by".to_string(),
"metrics-aggregator".to_string(),
);
labels.insert("source_target".to_string(), target_name.clone());
for (k, v) in &extra_group_labels {
labels.insert(k.clone(), v.clone());
}
let label_str = if labels.is_empty() {
String::new()
} else {
let pairs: Vec<String> = labels
.iter()
.map(|(k, v)| {
format!(
r#"{}="{}""#,
k,
v.replace('\\', "\\\\").replace('"', "\\\"")
)
})
.collect();
format!("{{{}}}", pairs.join(","))
};
let _ = writeln!(&mut output, "{}{} {}", metric.name, label_str, metric.value);
}
}
output
}
async fn render_pushed_metrics(stats_store: web::Data<StatsStore>) -> String {
let guard = stats_store.read().await;
let mut output = String::new();
for (app_name, entry) in guard.iter() {
let labels = [
format!(r#"app="{}""#, app_name),
"aggregated_by".to_string(),
"metrics-aggregator".to_string(),
"push_source=true".to_string(),
];
let label_str = format!("{{{}}}", labels.join(","));
let h = &entry;
let _ = writeln!(
&mut output,
"push_http_requests_total{} {}",
label_str, h.requests_total
);
let _ = writeln!(
&mut output,
"push_http_request_duration_ms_total{} {}",
label_str, h.request_duration_ms_total
);
let _ = writeln!(
&mut output,
"push_http_requests_2xx{} {}",
label_str, h.requests_2xx
);
let _ = writeln!(
&mut output,
"push_http_requests_4xx{} {}",
label_str, h.requests_4xx
);
let _ = writeln!(
&mut output,
"push_http_requests_5xx{} {}",
label_str, h.requests_5xx
);
for (endpoint, &count) in &h.endpoints {
let sanitized = endpoint.replace([' ', '/'], "_").to_lowercase();
let ep_labels = format!(
r#"app="{}",endpoint="{}",aggregated_by="metrics-aggregator",push_source="true""#,
app_name, sanitized
);
let _ = writeln!(
&mut output,
"push_http_endpoint_requests_total{{{}}} {}",
ep_labels, count
);
}
// System metrics in Prometheus format
let sys_labels = format!(r#"app="{}",aggregated_by="metrics-aggregator""#, app_name);
let _ = writeln!(
&mut output,
"system_cpu_usage_percent{{{}}} {}",
sys_labels, h.cpu_usage_percent
);
let _ = writeln!(
&mut output,
"system_memory_used_mb{{{}}} {}",
sys_labels, h.memory_used_mb
);
let _ = writeln!(
&mut output,
"system_memory_total_mb{{{}}} {}",
sys_labels, h.memory_total_mb
);
let _ = writeln!(
&mut output,
"system_uptime_secs{{{}}} {}",
sys_labels, h.uptime_secs
);
// Business counters
for (counter_name, value) in &h.business {
let biz_labels = format!(r#"app="{}",aggregated_by="metrics-aggregator""#, app_name);
let _ = writeln!(&mut output, "{}{{{}}} {}", counter_name, biz_labels, value);
}
// Token usage
let ai_labels = format!(r#"app="{}",aggregated_by="metrics-aggregator""#, app_name);
let _ = writeln!(
&mut output,
"ai_input_tokens_total{{{}}} {}",
ai_labels, h.ai_input_tokens_total
);
let _ = writeln!(
&mut output,
"ai_output_tokens_total{{{}}} {}",
ai_labels, h.ai_output_tokens_total
);
let _ = writeln!(
&mut output,
"ai_calls_total{{{}}} {}",
ai_labels, h.ai_calls_total
);
// Latency per endpoint
for (endpoint, lat) in &h.latency {
let lat_labels = format!(
r#"app="{}",endpoint="{}",aggregated_by="metrics-aggregator""#,
app_name, endpoint
);
let _ = writeln!(
&mut output,
"latency_p99_ms{{{}}} {}",
lat_labels, lat.p99_ms
);
let _ = writeln!(
&mut output,
"latency_p90_ms{{{}}} {}",
lat_labels, lat.p90_ms
);
let _ = writeln!(
&mut output,
"latency_p50_ms{{{}}} {}",
lat_labels, lat.p50_ms
);
let _ = writeln!(
&mut output,
"latency_max_ms{{{}}} {}",
lat_labels, lat.max_ms
);
}
}
output
}
// ── JSON API handlers ────────────────────────────────────────────────────────
async fn handle_dashboard(stats_store: web::Data<StatsStore>) -> HttpResponse {
let dashboard = stats_store::build_dashboard(&stats_store).await;
let json = serde_json::to_string(&dashboard).unwrap_or_default();
HttpResponse::Ok()
.content_type("application/json")
.body(json)
}
async fn handle_stats(stats_store: web::Data<StatsStore>) -> HttpResponse {
// Returns per-app stats as JSON
let guard = stats_store.read().await;
let json = serde_json::to_string(&*guard).unwrap_or_default();
HttpResponse::Ok()
.content_type("application/json")
.body(json)
}
async fn log_collector(loki: Option<LokiForwarder>, mut shutdown: broadcast::Receiver<()>) {
let stdin = tokio::io::stdin();
let mut reader = tokio::io::BufReader::new(stdin);
let mut interval_tick = interval(Duration::from_secs(1));
let mut batch: Vec<LokiEntry> = Vec::with_capacity(100);
let mut line_buf = String::new();
loop {
tokio::select! {
_ = shutdown.recv() => break,
_ = interval_tick.tick() => {
if !batch.is_empty() {
if let Some(ref loki) = loki {
if let Err(e) = loki.push(std::mem::take(&mut batch)).await {
tracing::warn!(error = %e, "Loki push failed");
}
}
}
}
_ = async { line_buf.clear(); reader.read_line(&mut line_buf).await.ok() } => {
if !line_buf.is_empty() {
let line = line_buf.trim_end().to_string();
if !line.is_empty() {
batch.push(LokiEntry {
timestamp: chrono::Utc::now(),
line,
});
if batch.len() >= 100 {
if let Some(ref loki) = loki {
if let Err(e) = loki.push(std::mem::take(&mut batch)).await {
tracing::warn!(error = %e, "Loki push failed");
}
}
}
}
}
}
}
}
}

View File

@ -1,101 +0,0 @@
use metrics::{
Counter, Gauge, Histogram, Unit, describe_counter, describe_gauge, describe_histogram,
};
pub fn init() {
describe_gauge!(
"aggregator_targets_total",
Unit::Count,
"Total number of scrape targets known to the aggregator"
);
describe_gauge!(
"aggregator_targets_healthy",
Unit::Count,
"Number of scrape targets that responded last scrape"
);
describe_counter!(
"aggregator_scrape_total",
Unit::Count,
"Total number of scrape attempts"
);
describe_counter!(
"aggregator_scrape_success",
Unit::Count,
"Successful scrapes"
);
describe_counter!(
"aggregator_scrape_failures",
Unit::Count,
"Failed scrape attempts"
);
describe_counter!(
"aggregator_scrape_errors_parse",
Unit::Count,
"Scrape failures due to parse errors"
);
describe_counter!(
"aggregator_scrape_errors_timeout",
Unit::Count,
"Scrape failures due to timeout"
);
describe_counter!(
"aggregator_scrape_errors_connection",
Unit::Count,
"Scrape failures due to connection errors"
);
describe_counter!(
"aggregator_targets_discovered",
Unit::Count,
"Total targets discovered"
);
describe_counter!(
"aggregator_targets_lost",
Unit::Count,
"Total targets that disappeared"
);
describe_histogram!(
"aggregator_scrape_duration_ms",
Unit::Milliseconds,
"Scrape duration in milliseconds"
);
}
#[derive(Clone)]
#[allow(dead_code)]
pub struct AggMetrics {
pub targets_total: Gauge,
pub targets_healthy: Gauge,
pub scrape_total: Counter,
pub scrape_success: Counter,
pub scrape_failures: Counter,
pub scrape_errors_parse: Counter,
pub scrape_errors_timeout: Counter,
pub scrape_errors_connection: Counter,
pub targets_discovered: Counter,
pub targets_lost: Counter,
pub scrape_duration: Histogram,
}
impl Default for AggMetrics {
fn default() -> Self {
Self {
targets_total: metrics::gauge!("aggregator_targets_total"),
targets_healthy: metrics::gauge!("aggregator_targets_healthy"),
scrape_total: metrics::counter!("aggregator_scrape_total"),
scrape_success: metrics::counter!("aggregator_scrape_success"),
scrape_failures: metrics::counter!("aggregator_scrape_failures"),
scrape_errors_parse: metrics::counter!("aggregator_scrape_errors_parse"),
scrape_errors_timeout: metrics::counter!("aggregator_scrape_errors_timeout"),
scrape_errors_connection: metrics::counter!("aggregator_scrape_errors_connection"),
targets_discovered: metrics::counter!("aggregator_targets_discovered"),
targets_lost: metrics::counter!("aggregator_targets_lost"),
scrape_duration: metrics::histogram!("aggregator_scrape_duration_ms"),
}
}
}
impl AggMetrics {
pub fn new() -> Self {
Self::default()
}
}

View File

@ -1,42 +0,0 @@
use anyhow::Context;
use opentelemetry::trace::TracerProvider;
use opentelemetry_otlp::{SpanExporter, WithExportConfig};
use opentelemetry_sdk::trace as sdktrace;
use tracing_opentelemetry::layer;
use tracing_subscriber::prelude::*;
pub struct OtelGuard {
provider: sdktrace::SdkTracerProvider,
}
impl OtelGuard {
pub async fn shutdown(self) {
if let Err(e) = self.provider.shutdown() {
tracing::warn!(error = %e, "OTLP shutdown error");
}
}
}
pub fn init_otel(endpoint: &str, service_name: &str) -> anyhow::Result<OtelGuard> {
let exporter = SpanExporter::builder()
.with_http()
.with_endpoint(endpoint)
.build()
.context("build OTLP exporter")?;
let tracer_provider = sdktrace::SdkTracerProvider::builder()
.with_batch_exporter(exporter)
.build();
let tracer = tracer_provider.tracer(service_name.to_string());
let otel_layer = layer().with_tracer(tracer);
tracing_subscriber::registry()
.with(otel_layer)
.try_init()
.context("install OTLP tracing subscriber")?;
Ok(OtelGuard {
provider: tracer_provider,
})
}

View File

@ -1,135 +0,0 @@
use awc::Client;
use std::collections::HashMap;
use crate::target::ScrapeTarget;
#[derive(Clone)]
pub struct HttpClient {
client: Client,
}
impl HttpClient {
pub fn new(timeout_secs: u64) -> Self {
let client = Client::builder()
.timeout(std::time::Duration::from_secs(timeout_secs))
.finish();
Self { client }
}
pub async fn scrape(&self, target: &ScrapeTarget) -> ScrapeResult {
let start = std::time::Instant::now();
let url = target.url();
let mut resp = match self.client.get(url).send().await {
Ok(resp) => resp,
Err(e) => {
let msg = e.to_string();
if msg.contains("timeout") || msg.contains("TimedOut") || msg.contains("timed out")
{
return ScrapeResult::Timeout;
}
return ScrapeResult::ConnectionError(msg);
}
};
if !resp.status().is_success() {
return ScrapeResult::HttpError(resp.status().as_u16());
}
let body = match resp.body().await {
Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
Err(e) => return ScrapeResult::ConnectionError(e.to_string()),
};
let scrape_ms = start.elapsed().as_millis() as f64;
ScrapeResult::Success(body, scrape_ms)
}
}
pub enum ScrapeResult {
Success(String, f64),
Timeout,
ConnectionError(String),
HttpError(u16),
}
#[derive(Clone, Debug)]
pub struct PromMetric {
pub name: String,
pub value: f64,
pub labels: HashMap<String, String>,
}
pub fn parse_prometheus(body: &str) -> Vec<PromMetric> {
let mut metrics = Vec::new();
for line in body.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let (name_and_labels, value_str) = match line.find(' ') {
Some(pos) => (&line[..pos], &line[pos + 1..]),
None => continue,
};
let value: f64 = match value_str
.split_whitespace()
.next()
.and_then(|v| v.parse().ok())
{
Some(v) => v,
None => continue,
};
let (metric_name, labels) = if let Some(brace) = name_and_labels.find('{') {
let name = &name_and_labels[..brace];
let label_str = &name_and_labels[brace + 1..name_and_labels.len() - 1];
let labels = parse_labels(label_str);
(name.to_string(), labels)
} else {
(name_and_labels.to_string(), HashMap::new())
};
metrics.push(PromMetric {
name: metric_name,
value,
labels,
});
}
metrics
}
pub fn parse_labels(s: &str) -> HashMap<String, String> {
let mut labels = HashMap::new();
let mut remaining = s;
while !remaining.is_empty() {
if let Some(eq) = remaining.find('=') {
let key = remaining[..eq].trim().to_string();
remaining = &remaining[eq + 1..];
let (value, rest) = if remaining.starts_with('"') {
let end = remaining[1..]
.find('"')
.map(|p| p + 1)
.unwrap_or(remaining.len());
(&remaining[1..end], &remaining[end + 1..])
} else if remaining.starts_with('\'') {
let end = remaining[1..]
.find('\'')
.map(|p| p + 1)
.unwrap_or(remaining.len());
(&remaining[1..end], &remaining[end + 1..])
} else {
let end = remaining
.find(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
.unwrap_or(remaining.len());
(&remaining[..end], &remaining[end..])
};
labels.insert(key, value.to_string());
remaining = rest.trim_start_matches(',').trim_start();
} else {
break;
}
}
labels
}

View File

@ -1,217 +0,0 @@
//! Stats store: receives expanded push payloads from all apps,
//! aggregates over time, computes derived statistics (p99 etc),
//! and provides JSON API for external consumption.
use serde::Serialize;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
/// Per-app, per-instance aggregated stats entry.
#[derive(Debug, Clone, Default, Serialize)]
pub struct AppStats {
/// Last seen timestamp.
pub last_seen: i64,
/// Number of push samples received.
pub sample_count: u64,
// ── HTTP ─────────────────────────────────────────────────────
pub requests_total: u64,
pub request_duration_ms_total: u64,
pub requests_2xx: u64,
pub requests_4xx: u64,
pub requests_5xx: u64,
pub endpoints: HashMap<String, u64>,
// ── System ───────────────────────────────────────────────────
pub cpu_usage_percent: f32,
pub memory_used_mb: u64,
pub memory_total_mb: u64,
pub uptime_secs: u64,
// ── Business counters ────────────────────────────────────────
pub business: HashMap<String, f64>,
// ── Token usage ──────────────────────────────────────────────
pub ai_input_tokens_total: i64,
pub ai_output_tokens_total: i64,
pub ai_calls_total: i64,
pub ai_calls_success: i64,
pub ai_calls_failure: i64,
pub token_by_model: HashMap<String, ModelTokenStats>,
// ── Tasks ────────────────────────────────────────────────────
pub tasks_queued: i64,
pub tasks_running: i64,
pub tasks_completed: i64,
pub tasks_failed: i64,
// ── Latency ──────────────────────────────────────────────────
pub latency: HashMap<String, LatencyStats>,
// ── Logs ─────────────────────────────────────────────────────
#[serde(skip_serializing)]
pub logs: Vec<(i64, String)>,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct ModelTokenStats {
pub input_tokens: i64,
pub output_tokens: i64,
pub calls: i64,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct LatencyStats {
pub p50_ms: f64,
pub p90_ms: f64,
pub p99_ms: f64,
pub max_ms: f64,
pub count: u64,
}
/// The global stats store: app_name → AppStats.
pub type StatsStore = Arc<RwLock<HashMap<String, AppStats>>>;
/// Merge a new push payload into the stats store.
pub async fn merge_push_payload(
store: &StatsStore,
app: &str,
_instance: &str,
timestamp: i64,
http: Option<&observability::push::HttpPayload>,
system: Option<&observability::push::SystemPayload>,
business: &HashMap<String, f64>,
token_usage: Option<&observability::push::TokenUsagePayload>,
tasks: Option<&observability::push::TaskStatsPayload>,
latency: &HashMap<String, observability::push::LatencySnapshot>,
logs: &[observability::push::LogEntry],
) {
// Use app_name as key (merge across instances for aggregation)
let mut guard = store.write().await;
let entry = guard.entry(app.to_string()).or_default();
entry.last_seen = timestamp;
entry.sample_count += 1;
// HTTP — accumulate (not replace, so we get totals over time)
if let Some(http) = http {
entry.requests_total = http.requests_total;
entry.request_duration_ms_total = http.request_duration_ms_total;
entry.requests_2xx = http.requests_2xx;
entry.requests_4xx = http.requests_4xx;
entry.requests_5xx = http.requests_5xx;
for (ep, count) in &http.endpoints {
*entry.endpoints.entry(ep.clone()).or_insert(0) = *count;
}
}
// System — replace (current snapshot, not cumulative)
if let Some(sys) = system {
entry.cpu_usage_percent = sys.cpu_usage_percent;
entry.memory_used_mb = sys.memory_used_mb;
entry.memory_total_mb = sys.memory_total_mb;
entry.uptime_secs = sys.uptime_secs;
}
// Business — replace with latest snapshot
entry.business = business.clone();
// Token usage — replace with latest
if let Some(tu) = token_usage {
entry.ai_input_tokens_total = tu.ai_input_tokens_total;
entry.ai_output_tokens_total = tu.ai_output_tokens_total;
entry.ai_calls_total = tu.ai_calls_total;
entry.ai_calls_success = tu.ai_calls_success;
entry.ai_calls_failure = tu.ai_calls_failure;
for (model, usage) in &tu.by_model {
let ms = entry.token_by_model.entry(model.clone()).or_default();
ms.input_tokens = usage.input_tokens;
ms.output_tokens = usage.output_tokens;
ms.calls = usage.calls;
}
}
// Tasks — replace with latest
if let Some(t) = tasks {
entry.tasks_queued = t.queued;
entry.tasks_running = t.running;
entry.tasks_completed = t.completed;
entry.tasks_failed = t.failed;
}
// Latency — replace with latest snapshots
for (endpoint, snap) in latency {
let ls = entry.latency.entry(endpoint.clone()).or_default();
ls.p50_ms = snap.p50_ms;
ls.p90_ms = snap.p90_ms;
ls.p99_ms = snap.p99_ms;
ls.max_ms = snap.max_ms;
ls.count = snap.count;
}
// Logs — append (keep last 300 lines)
for log in logs {
entry.logs.push((
log.timestamp,
format!("[{}] {}", log.level.to_lowercase(), log.message),
));
}
let cutoff = chrono::Utc::now().timestamp() - 300;
entry.logs.retain(|(ts, _)| *ts >= cutoff);
}
/// Dashboard response combining all apps' stats.
#[derive(Debug, Serialize)]
pub struct DashboardResponse {
/// Timestamp of this snapshot.
pub timestamp: i64,
/// Total number of app instances reporting.
pub app_count: u64,
/// Per-app aggregated stats.
pub apps: HashMap<String, AppStats>,
/// Derived: average p99 latency across all apps.
pub avg_p99_ms: f64,
/// Derived: total tokens consumed across all apps.
pub total_input_tokens: i64,
pub total_output_tokens: i64,
/// Derived: total AI calls across all apps.
pub total_ai_calls: i64,
}
/// Build the dashboard response from the stats store.
pub async fn build_dashboard(store: &StatsStore) -> DashboardResponse {
let guard = store.read().await;
let mut avg_p99 = 0.0;
let mut p99_count = 0;
let mut total_input = 0i64;
let mut total_output = 0i64;
let mut total_calls = 0i64;
for (_, stats) in guard.iter() {
total_input += stats.ai_input_tokens_total;
total_output += stats.ai_output_tokens_total;
total_calls += stats.ai_calls_total;
for (_, lat) in &stats.latency {
avg_p99 += lat.p99_ms;
p99_count += 1;
}
}
let avg_p99_ms = if p99_count > 0 {
avg_p99 / p99_count as f64
} else {
0.0
};
DashboardResponse {
timestamp: chrono::Utc::now().timestamp(),
app_count: guard.len() as u64,
apps: guard.clone(),
avg_p99_ms,
total_input_tokens: total_input,
total_output_tokens: total_output,
total_ai_calls: total_calls,
}
}

View File

@ -1,36 +0,0 @@
use anyhow::Context;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ScrapeTarget {
pub name: String,
pub addr: String,
#[serde(default = "default_metrics_path")]
pub metrics_path: String,
#[serde(default)]
pub labels: HashMap<String, String>,
}
fn default_metrics_path() -> String {
"/metrics".to_string()
}
impl ScrapeTarget {
pub fn url(&self) -> String {
if self.metrics_path.starts_with("http") {
self.metrics_path.clone()
} else {
format!("http://{}{}", self.addr, self.metrics_path)
}
}
}
pub async fn load_targets_from_file(path: &str) -> anyhow::Result<Vec<ScrapeTarget>> {
let content = tokio::fs::read_to_string(path)
.await
.context("read targets file")?;
let targets: Vec<ScrapeTarget> =
serde_json::from_str(&content).with_context(|| format!("parse targets file {path}"))?;
Ok(targets)
}

View File

@ -1,13 +0,0 @@
[package]
name = "migrate-cli"
version.workspace = true
edition.workspace = true
[dependencies]
migrate.workspace = true
sea-orm = { workspace = true, features = ["sqlx-all", "runtime-tokio"] }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
anyhow.workspace = true
clap.workspace = true
dotenvy.workspace = true
config = { workspace = true }

View File

@ -1,102 +0,0 @@
use anyhow::Context;
use clap::Command;
use migrate::MigratorTrait;
use sea_orm::{Database, DatabaseConnection};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
dotenvy::dotenv().ok();
config::AppConfig::load();
let cmd = Command::new("migrate")
.about("Database migration CLI")
.arg(
clap::Arg::new("steps")
.help("Number of migrations (for up/down)")
.required(false)
.index(1),
)
.subcommand(Command::new("up").about("Apply pending migrations"))
.subcommand(Command::new("down").about("Revert applied migrations"))
.subcommand(Command::new("fresh").about("Drop all tables and re-apply"))
.subcommand(Command::new("refresh").about("Revert all then re-apply"))
.subcommand(Command::new("reset").about("Revert all applied migrations"))
.subcommand(Command::new("status").about("Show migration status"))
.try_get_matches()
.map_err(|e| anyhow::anyhow!("{}", e))?;
let db_url = config::AppConfig::load().database_url()?;
let db: DatabaseConnection = Database::connect(&db_url).await?;
match cmd.subcommand_name() {
Some("up") => {
let steps = cmd
.get_one::<String>("steps")
.and_then(|s| s.parse().ok())
.unwrap_or(0);
run_up(&db, steps).await?;
}
Some("down") => {
let steps = cmd
.get_one::<String>("steps")
.and_then(|s| s.parse().ok())
.unwrap_or(1);
run_down(&db, steps).await?;
}
Some("fresh") => run_fresh(&db).await?,
Some("refresh") => run_refresh(&db).await?,
Some("reset") => run_reset(&db).await?,
Some("status") => run_status(&db).await?,
_ => {
eprintln!(
"Usage: migrate <command>\nCommands: up, down, fresh, refresh, reset, status"
);
std::process::exit(1);
}
}
Ok(())
}
async fn run_up(db: &DatabaseConnection, steps: u32) -> anyhow::Result<()> {
migrate::Migrator::up(db, if steps == 0 { None } else { Some(steps) })
.await
.context("failed to run migrations up")?;
Ok(())
}
async fn run_down(db: &DatabaseConnection, steps: u32) -> anyhow::Result<()> {
migrate::Migrator::down(db, Some(steps))
.await
.context("failed to run migrations down")?;
Ok(())
}
async fn run_fresh(db: &DatabaseConnection) -> anyhow::Result<()> {
migrate::Migrator::fresh(db)
.await
.context("failed to run migrations fresh")?;
Ok(())
}
async fn run_refresh(db: &DatabaseConnection) -> anyhow::Result<()> {
migrate::Migrator::refresh(db)
.await
.context("failed to run migrations refresh")?;
Ok(())
}
async fn run_reset(db: &DatabaseConnection) -> anyhow::Result<()> {
migrate::Migrator::reset(db)
.await
.context("failed to run migrations reset")?;
Ok(())
}
async fn run_status(db: &DatabaseConnection) -> anyhow::Result<()> {
migrate::Migrator::status(db)
.await
.context("failed to get migration status")?;
Ok(())
}

View File

@ -1,21 +0,0 @@
[package]
name = "static-server"
version.workspace = true
edition.workspace = true
[dependencies]
actix-web = { workspace = true }
actix-files = { workspace = true }
actix-cors = { workspace = true }
observability = { workspace = true }
metrics-exporter-prometheus = "0.13"
tokio = { workspace = true, features = ["full"] }
futures = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
mime = { workspace = true }
mime_guess2 = { workspace = true }
slog = { workspace = true }
anyhow = { workspace = true }
env_logger = { workspace = true }
log = "0.4"

View File

@ -1,212 +0,0 @@
use actix_cors::Cors;
use actix_files::Files;
use actix_web::dev::{Service, ServiceRequest, ServiceResponse};
use actix_web::{App, HttpResponse, HttpServer, http::header, web};
use futures::future::LocalBoxFuture;
use log::info;
use observability::{HttpMetrics, init_tracing_subscriber, install_recorder, push::MetricsPusher};
use std::path::PathBuf;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Instant;
/// Static file server for avatar, blob, and other static files
/// Serves files from /data/{type} directories
#[derive(Clone)]
struct StaticConfig {
root: PathBuf,
cors_enabled: bool,
}
impl StaticConfig {
fn from_env() -> Self {
let root = std::env::var("STATIC_ROOT").unwrap_or_else(|_| "/data".to_string());
let cors = std::env::var("STATIC_CORS").unwrap_or_else(|_| "true".to_string());
Self {
root: PathBuf::from(root),
cors_enabled: cors == "true" || cors == "1",
}
}
fn ensure_dir(&self, name: &str) -> PathBuf {
let dir = self.root.join(name);
if !dir.exists() {
std::fs::create_dir_all(&dir).ok();
}
dir
}
}
async fn health() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({
"status": "ok",
"service": "static-server"
}))
}
/// Custom middleware that logs requests except for noisy paths (health, metrics, static files).
struct RequestLogger;
impl<S, B> actix_web::dev::Transform<S, ServiceRequest> for RequestLogger
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Transform = RequestLoggerService<S>;
type InitError = ();
type Future = futures::future::Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
futures::future::ok(RequestLoggerService {
service,
_marker: std::marker::PhantomData,
})
}
}
struct RequestLoggerService<S> {
service: S,
_marker: std::marker::PhantomData<fn(ServiceRequest)>,
}
impl<S, B> Service<ServiceRequest> for RequestLoggerService<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
fn poll_ready(&self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&self, req: ServiceRequest) -> Self::Future {
let path = req.path().to_string();
let method = req.method().to_string();
let should_log = !(path == "/health"
|| path == "/metrics"
|| path.starts_with("/ws")
|| path.starts_with("/avatar")
|| path.starts_with("/blob")
|| path.starts_with("/media")
|| path.starts_with("/static"));
let start = Instant::now();
let fut = self.service.call(req);
Box::pin(async move {
let res = fut.await?;
if should_log {
info!(
target: "static_server",
"{} {} {} {:?}",
method,
path,
res.status().as_u16(),
start.elapsed()
);
}
Ok(res)
})
}
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
init_tracing_subscriber("info", false);
let prometheus_handle = Arc::new(install_recorder());
let http_metrics = Arc::new(HttpMetrics::new());
// Metrics pusher: periodically push all metrics to apps/metrics aggregator
if let Some(push_url) = std::env::var("METRICS_PUSH_URL").ok() {
let pusher = MetricsPusher::new(&push_url, "static");
pusher.spawn(
http_metrics.clone(),
prometheus_handle.clone(),
std::time::Duration::from_secs(15),
);
info!("Metrics pusher started (interval 15s, url: {})", push_url);
}
let cfg = StaticConfig::from_env();
let bind = std::env::var("STATIC_BIND").unwrap_or_else(|_| "0.0.0.0:8081".to_string());
println!("Static file server starting...");
println!(" Root: {:?}", cfg.root);
println!(" Bind: {}", bind);
println!(
" CORS: {}",
if cfg.cors_enabled {
"enabled"
} else {
"disabled"
}
);
// Ensure all directories exist
for name in ["avatar", "blob", "media", "static"] {
let dir = cfg.ensure_dir(name);
println!(" {} dir: {:?}", name, dir);
}
let root = cfg.root.clone();
let cors_enabled = cfg.cors_enabled;
HttpServer::new(move || {
let root = root.clone();
let cors = if cors_enabled {
// WARNING: allow_any_origin is intentional for static asset serving (CDN mode)
// Ensure no sensitive files are served from this directory
Cors::default()
.allow_any_origin()
.allowed_methods(vec!["GET", "HEAD", "OPTIONS"])
.allowed_headers(vec![
header::AUTHORIZATION,
header::ACCEPT,
header::CONTENT_TYPE,
])
.max_age(3600)
} else {
Cors::permissive()
};
App::new()
.wrap(cors)
.wrap(RequestLogger)
.route("/health", web::get().to(health))
.service(
Files::new("/avatar", root.join("avatar"))
.prefer_utf8(true)
.index_file("index.html"),
)
.service(
Files::new("/blob", root.join("blob"))
.prefer_utf8(true)
.index_file("index.html"),
)
.service(
Files::new("/media", root.join("media"))
.prefer_utf8(true)
.index_file("index.html"),
)
.service(
Files::new("/static", root.join("static"))
.prefer_utf8(true)
.index_file("index.html"),
)
})
.bind(&bind)?
.run()
.await?;
Ok(())
}

1807
bun.lock

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"$schema": "https://ui.shadcn.com/schema.json", "$schema": "https://ui.shadcn.com/schema.json",
"style": "radix-nova", "style": "base-nova",
"rsc": false, "rsc": false,
"tsx": true, "tsx": true,
"tailwind": { "tailwind": {
@ -12,6 +12,8 @@
}, },
"iconLibrary": "lucide", "iconLibrary": "lucide",
"rtl": false, "rtl": false,
"menuColor": "default",
"menuAccent": "subtle",
"aliases": { "aliases": {
"components": "@/components", "components": "@/components",
"utils": "@/lib/utils", "utils": "@/lib/utils",
@ -19,9 +21,7 @@
"lib": "@/lib", "lib": "@/lib",
"hooks": "@/hooks" "hooks": "@/hooks"
}, },
"menuColor": "default",
"menuAccent": "subtle",
"registries": { "registries": {
"@ai-elements": "https://ai-sdk.dev/elements/api/registry/{name}.json" "@manifest": "https://ui.manifest.build/r/{name}.json"
} }
} }

View File

@ -1,25 +0,0 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
# Secrets
.server.yaml

View File

@ -1,6 +0,0 @@
apiVersion: v2
name: deploy
description: Helm chart for the project backend services
type: application
version: 0.1.0
appVersion: "0.2.9"

View File

@ -1,209 +0,0 @@
# Deploy Helm Chart
Monolithic Helm chart for all backend services.
## Services
| Service | Port(s) | Replicas | HPA | Purpose |
|----------------------|-------------------------|----------|----------|---------------------------------------------|
| `app` | 3000 (HTTP) | 2 | 210 | Main API server |
| `gitserver` | 8021 (HTTP), 2222 (SSH) | 1 | 15 | Git HTTP + SSH server |
| `email_worker` | 8084 (HTTP) | 1 | disabled | Email queue consumer (single instance only) |
| `git_hook` | 8083 (HTTP) | 1 | 15 | Git hook worker pool |
| `metrics_aggregator` | 9090 (HTTP) | 1 | 15 | Prometheus scrape + Loki push |
| `static_server` | 8081 (HTTP) | 1 | 15 | Static file server (avatars, blobs, media) |
## Prerequisites
The following resources must exist in the cluster **before** installing the Helm chart. They are not managed by Helm —
install, upgrade, and uninstall of the chart will not touch them.
### 1. Namespace
```bash
kubectl create namespace app
```
### 2. PVC (aliyun-nfs-app, 200Ti, ReadWriteMany)
```bash
kubectl apply -f - <<'EOF'
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: shared-data
namespace: app
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 200Ti
storageClassName: aliyun-nfs-app
EOF
```
> The chart references this PVC by hardcoded name `shared-data`. This name is immutable — it cannot be changed via Helm
> values.
### 3. ConfigMap
```bash
kubectl apply -f - <<'EOF'
apiVersion: v1
kind: ConfigMap
metadata:
name: app-env
namespace: app
data:
APP_REPOS_ROOT: "/data/repos"
APP_AVATAR_PATH: "/data/avatars"
STORAGE_PATH: "/data/files"
STATIC_ROOT: "/data"
APP_LOG_LEVEL: "info"
APP_COOKIE_SECURE: "false"
APP_DOMAIN_URL: "https://your-domain.com"
APP_DATABASE_URL: "postgres://user:pass@postgres:5432/app"
APP_REDIS_URL: "redis://redis:6379"
APP_AI_BASIC_URL: "https://api.openai.com/v1"
APP_AI_API_KEY: "sk-..."
APP_SMTP_PASSWORD: "..."
APP_SESSION_SECRET: "min-32-byte-random-string..."
APP_SSH_SERVER_PRIVATE_KEY: "<hex-encoded-private-key>"
EOF
```
| Variable | Default / Example | Required |
|------------------------------|-----------------------------|-----------|
| `APP_REPOS_ROOT` | `/data/repos` | Yes |
| `APP_AVATAR_PATH` | `/data/avatars` | Yes |
| `STORAGE_PATH` | `/data/files` | Yes |
| `STATIC_ROOT` | `/data` | Yes |
| `APP_LOG_LEVEL` | `info` | No |
| `APP_COOKIE_SECURE` | `false` | No |
| `APP_DOMAIN_URL` | `https://your-domain.com` | Yes |
| `APP_DATABASE_URL` | `postgres://...` | **Yes** |
| `APP_REDIS_URL` | `redis://...` | **Yes** |
| `APP_AI_BASIC_URL` | `https://api.openai.com/v1` | **Yes** |
| `APP_AI_API_KEY` | `sk-...` | **Yes** |
| `APP_SMTP_PASSWORD` | `...` | **Yes** |
| `APP_SESSION_SECRET` | min 32 bytes | **Yes** |
| `APP_SSH_SERVER_PRIVATE_KEY` | hex-encoded PEM | **Yes** |
| `APP_SSH_PORT` | `2222` | Yes (k8s) |
> **SSH host key**: `APP_SSH_SERVER_PRIVATE_KEY` must be the hex-encoded Ed25519 private key PEM bytes.
> ```bash
> ssh-keygen -t ed25519 -f /tmp/ssh_host_key -N ""
> hexdump -v -e '/1 "%02x"' < /tmp/ssh_host_key
> ```
>
> **Session secret**: generate 48 random bytes:
> ```bash
> openssl rand -base64 48
> ```
>
> Override the ConfigMap name with `--set configMapName=your-cm-name`.
### 4. Verify prerequisites
```bash
kubectl get namespace app
kubectl get pvc -n app shared-data
kubectl get configmap -n app app-env
```
## Quick Start
```bash
helm template deploy ./deploy --namespace app --set imageRegistry=ghcr.io/your-org
helm lint ./deploy
# Install
helm upgrade --install deploy ./deploy \
--namespace app \
--set imageRegistry=ghcr.io/your-org \
--set imageTag=v0.2.9
```
## Storage
All services share a single PVC (`shared-data`) via `subPath` mounts:
| SubPath | Mount | Used By |
|-----------|-----------------|--------------------------|
| `repos` | `/data/repos` | app, gitserver, git-hook |
| `avatars` | `/data/avatars` | app |
| `files` | `/data/files` | app |
| `static` | `/data` | static-server |
Pods run as UID/GID `1000` and set `fsGroup: 1000` so Git processes can create temporary object
directories under bare repositories. If an existing PVC was previously written by another UID,
fix ownership once from a maintenance pod:
```bash
chown -R 1000:1000 /data/repos
chmod -R u+rwX,g+rwX /data/repos
```
## Autoscaling
All services except `email_worker` have HPA enabled by default. The email worker is fixed at 1 replica and must not be
scaled.
To adjust HPA bounds per service:
```bash
--set services.app.autoscaling.maxReplicas=20
--set services.app.autoscaling.targetCPUUtilization=70
```
To disable HPA for a service:
```bash
--set services.git_hook.autoscaling.enabled=false
```
## Ingress
```bash
helm upgrade --install deploy ./deploy \
--namespace app \
--set ingress.enabled=true \
--set ingress.className=nginx \
--set ingress.hosts[0].host=your-domain.com
```
## Dependencies
All services require these to be reachable from the cluster:
- PostgreSQL (via `APP_DATABASE_URL`)
- Redis (via `APP_REDIS_URL`)
- Git binary (included in all Docker images)
- OpenAI-compatible API (via `APP_AI_BASIC_URL` + `APP_AI_API_KEY`)
- Qdrant vector DB (via `APP_QDRANT_URL`)
- SMTP server (via `APP_SMTP_*`)
- Embedding model (via `APP_EMBED_MODEL_*`)
Optional dependencies with graceful degradation:
| Dependency | Variable | Fallback |
|----------------|-------------------------------|------------------|
| NATS JetStream | `NATS_URL` + `NATS_TOKEN` | Redis queue |
| Loki | `LOKI_URL` | Logs discarded |
| OTEL Collector | `OTEL_EXPORTER_OTLP_ENDPOINT` | Tracing disabled |
## Production Example
```bash
helm upgrade --install deploy ./deploy \
--namespace app \
--set imageRegistry=ghcr.io/your-org \
--set imageTag=v0.2.9 \
--set services.app.replicas=3 \
--set services.app.autoscaling.maxReplicas=20 \
--set ingress.enabled=true \
--set ingress.className=nginx \
--set ingress.hosts[0].host=your-domain.com \
--set configMapName=app-env
```

View File

@ -1,19 +0,0 @@
Project backend services deployed to namespace: {{ .Release.Namespace }}
Services:
{{- range $svcKey, $svcVal := .Values.services }}
{{ $svcKey | replace "_" "-" }}: {{ if $svcVal.ports }}{{ range $portName, $portNum := $svcVal.ports }}{{ $portName }}={{ $portNum }} {{ end }}{{ else }}port={{ $svcVal.port }}{{ end }} {{ if $svcVal.autoscaling.enabled }}(HPA: {{ $svcVal.autoscaling.minReplicas }}-{{ $svcVal.autoscaling.maxReplicas }}){{ else }}(static: {{ $svcVal.replicaCount }}){{ end }}
{{- end }}
To access the app locally:
kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "deploy.serviceFullname" (dict "root" . "svcKey" "app") }} 3000:3000
To check HPA status:
{{- range $svcKey, $svcVal := .Values.services }}
{{- if $svcVal.autoscaling.enabled }}
kubectl get hpa -n {{ $.Release.Namespace }} {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" $svcKey) }}
{{- end }}
{{- end }}
To check all pods:
kubectl get pods -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "deploy.name" . }}"

View File

@ -1,78 +0,0 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "deploy.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
*/}}
{{- define "deploy.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Service fullname — includes service key for per-service resources.
Underscores in svcKey are replaced with hyphens for valid Kubernetes names.
*/}}
{{- define "deploy.serviceFullname" -}}
{{- printf "%s-%s" (include "deploy.fullname" .root) (.svcKey | replace "_" "-") | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Chart name and version as used by the chart label.
*/}}
{{- define "deploy.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "deploy.labels" -}}
helm.sh/chart: {{ include "deploy.chart" . }}
{{ include "deploy.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "deploy.selectorLabels" -}}
app.kubernetes.io/name: {{ include "deploy.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Per-service selector labels — used by Service to target the right Deployment.
Underscores in svcKey are replaced with hyphens for valid Kubernetes label values.
*/}}
{{- define "deploy.serviceSelectorLabels" -}}
app.kubernetes.io/name: {{ include "deploy.name" .root }}
app.kubernetes.io/instance: {{ .root.Release.Name }}
app.kubernetes.io/component: {{ .svcKey | replace "_" "-" }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "deploy.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "deploy.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -1,89 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "app") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: app
spec:
replicas: {{ .Values.services.app.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "app") | nindent 6 }}
template:
metadata:
labels:
{{- include "deploy.labels" . | nindent 8 }}
app.kubernetes.io/component: app
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "deploy.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: app
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.imageRegistry }}/{{ .Values.services.app.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
{{- with .Values.services.app.command }}
command:
{{- toYaml . | nindent 12 }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.services.app.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ .Values.configMapName }}
{{- with .Values.services.app.extraEnv }}
env:
{{- range $key, $val := . }}
- name: {{ $key }}
value: {{ $val | quote }}
{{- end }}
{{- end }}
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.services.app.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.services.app.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: shared-data
persistentVolumeClaim:
claimName: shared-data
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -1,16 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "app") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: app
spec:
type: ClusterIP
ports:
- port: {{ .Values.services.app.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "app") | nindent 4 }}

View File

@ -1,70 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "email_worker") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: email-worker
spec:
replicas: {{ .Values.services.email_worker.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "email_worker") | nindent 6 }}
template:
metadata:
labels:
{{- include "deploy.labels" . | nindent 8 }}
app.kubernetes.io/component: email-worker
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "deploy.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: email-worker
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.imageRegistry }}/{{ .Values.services.email_worker.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: {{ .Values.services.email_worker.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ .Values.configMapName }}
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.services.email_worker.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -1,16 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "email_worker") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: email-worker
spec:
type: ClusterIP
ports:
- port: {{ .Values.services.email_worker.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "email_worker") | nindent 4 }}

View File

@ -1,78 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "git_hook") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: git-hook
spec:
replicas: {{ .Values.services.git_hook.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "git_hook") | nindent 6 }}
template:
metadata:
labels:
{{- include "deploy.labels" . | nindent 8 }}
app.kubernetes.io/component: git-hook
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "deploy.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: git-hook
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.imageRegistry }}/{{ .Values.services.git_hook.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: {{ .Values.services.git_hook.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ .Values.configMapName }}
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.services.git_hook.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.services.git_hook.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: shared-data
persistentVolumeClaim:
claimName: shared-data
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -1,16 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "git_hook") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: git-hook
spec:
type: ClusterIP
ports:
- port: {{ .Values.services.git_hook.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "git_hook") | nindent 4 }}

View File

@ -1,88 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "gitserver") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: gitserver
spec:
replicas: {{ .Values.services.gitserver.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "gitserver") | nindent 6 }}
template:
metadata:
labels:
{{- include "deploy.labels" . | nindent 8 }}
app.kubernetes.io/component: gitserver
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "deploy.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: gitserver
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.imageRegistry }}/{{ .Values.services.gitserver.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: {{ .Values.services.gitserver.ports.http }}
protocol: TCP
- name: ssh
containerPort: {{ .Values.services.gitserver.ports.ssh }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ .Values.configMapName }}
{{- with .Values.services.gitserver.extraEnv }}
env:
{{- range $key, $val := . }}
- name: {{ $key }}
value: {{ $val | quote }}
{{- end }}
{{- end }}
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.services.gitserver.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.services.gitserver.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: shared-data
persistentVolumeClaim:
claimName: shared-data
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -1,20 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "gitserver") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: gitserver
spec:
type: ClusterIP
ports:
- port: {{ .Values.services.gitserver.ports.http }}
targetPort: http
protocol: TCP
name: http
- port: {{ .Values.services.gitserver.ports.ssh }}
targetPort: ssh
protocol: TCP
name: ssh
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "gitserver") | nindent 4 }}

View File

@ -1,21 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "gitserver") }}-ssh
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: gitserver
annotations:
{{- with .Values.services.gitserver.sshService.annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
type: LoadBalancer
externalTrafficPolicy: Local
ports:
- port: 22
targetPort: ssh
protocol: TCP
name: ssh
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "gitserver") | nindent 4 }}

View File

@ -1,26 +0,0 @@
{{- range $svcKey, $svcVal := .Values.services }}
{{- if $svcVal.autoscaling.enabled }}
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" $svcKey) }}
labels:
{{- include "deploy.labels" $ | nindent 4 }}
app.kubernetes.io/component: {{ $svcKey | replace "_" "-" }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" $svcKey) }}
minReplicas: {{ $svcVal.autoscaling.minReplicas }}
maxReplicas: {{ $svcVal.autoscaling.maxReplicas }}
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ $svcVal.autoscaling.targetCPUUtilization }}
{{- end }}
{{- end }}

View File

@ -1,41 +0,0 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "deploy.fullname" . }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.ingress.className }}
ingressClassName: {{ . }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ include "deploy.serviceFullname" (dict "root" $ "svcKey" .serviceName) }}
port:
number: {{ .servicePort }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -1,70 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "metrics_aggregator") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: metrics-aggregator
spec:
replicas: {{ .Values.services.metrics_aggregator.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "metrics_aggregator") | nindent 6 }}
template:
metadata:
labels:
{{- include "deploy.labels" . | nindent 8 }}
app.kubernetes.io/component: metrics-aggregator
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "deploy.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: metrics-aggregator
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.imageRegistry }}/{{ .Values.services.metrics_aggregator.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: {{ .Values.services.metrics_aggregator.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ .Values.configMapName }}
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.services.metrics_aggregator.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -1,16 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "metrics_aggregator") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: metrics-aggregator
spec:
type: ClusterIP
ports:
- port: {{ .Values.services.metrics_aggregator.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "metrics_aggregator") | nindent 4 }}

View File

@ -1 +0,0 @@
{{/* Secret disabled — all config via ConfigMap */}}

View File

@ -1,13 +0,0 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "deploy.serviceAccountName" . }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -1,78 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "static_server") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: static-server
spec:
replicas: {{ .Values.services.static_server.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "static_server") | nindent 6 }}
template:
metadata:
labels:
{{- include "deploy.labels" . | nindent 8 }}
app.kubernetes.io/component: static-server
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "deploy.serviceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: static-server
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.imageRegistry }}/{{ .Values.services.static_server.repository }}:{{ .Values.imageTag | default .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: {{ .Values.services.static_server.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ .Values.configMapName }}
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.services.static_server.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.services.static_server.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: shared-data
persistentVolumeClaim:
claimName: shared-data
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -1,16 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deploy.serviceFullname" (dict "root" . "svcKey" "static_server") }}
labels:
{{- include "deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: static-server
spec:
type: ClusterIP
ports:
- port: {{ .Values.services.static_server.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "deploy.serviceSelectorLabels" (dict "root" . "svcKey" "static_server") | nindent 4 }}

View File

@ -1,212 +0,0 @@
# Global image registry and tag
imageRegistry: ""
imageTag: ""
# External ConfigMap (managed outside Helm)
configMapName: "app-env"
# Service definitions
services:
app:
repository: app
port: 3000
replicaCount: 2
autoscaling:
enabled: true
minReplicas: 2
maxReplicas: 10
targetCPUUtilization: 80
command:
- "app"
- "--bind"
- "0.0.0.0:3000"
resources:
requests:
cpu: 200m
memory: 256Mi
limits:
cpu: "1"
memory: 512Mi
volumeMounts:
- name: shared-data
mountPath: /data/repos
subPath: repos
- name: shared-data
mountPath: /data/avatars
subPath: avatars
- name: shared-data
mountPath: /data/files
subPath: files
email_worker:
repository: email-worker
port: 8084
replicaCount: 1
autoscaling:
enabled: false # email must stay at 1 replica
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
git_hook:
repository: git-hook
port: 8083
replicaCount: 1
autoscaling:
enabled: true
minReplicas: 1
maxReplicas: 5
targetCPUUtilization: 80
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
volumeMounts:
- name: shared-data
mountPath: /data/repos
subPath: repos
gitserver:
repository: gitserver
ports:
http: 8021
ssh: 2222
replicaCount: 1
autoscaling:
enabled: true
minReplicas: 1
maxReplicas: 5
targetCPUUtilization: 80
# SSH port must match the containerPort
extraEnv:
APP_SSH_PORT: "2222"
# SSH service config (MetalLB + Cilium)
# Shared IP: nginx ingress (80/443) + SSH (22) on same VIP
# Requires ingress-nginx svc also annotated with allow-shared-ip: "gitdata-shared"
sshService:
annotations: {}
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
volumeMounts:
- name: shared-data
mountPath: /data/repos
subPath: repos
metrics_aggregator:
repository: metrics-aggregator
port: 9090
replicaCount: 1
autoscaling:
enabled: true
minReplicas: 1
maxReplicas: 5
targetCPUUtilization: 80
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
static_server:
repository: static-server
port: 8081
replicaCount: 1
autoscaling:
enabled: true
minReplicas: 1
maxReplicas: 5
targetCPUUtilization: 80
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 200m
memory: 128Mi
volumeMounts:
- name: shared-data
mountPath: /data
subPath: static
# Ingress
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "cloudflare-acme-cluster-issuer"
nginx.ingress.kubernetes.io/proxy-body-size: "0"
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
nginx.ingress.kubernetes.io/affinity: "cookie"
nginx.ingress.kubernetes.io/session-cookie-name: "INGRESSROUTE"
nginx.ingress.kubernetes.io/session-cookie-path: "/"
nginx.ingress.kubernetes.io/session-cookie-max-age: "86400"
nginx.ingress.kubernetes.io/enable-real-ip: "true"
nginx.ingress.kubernetes.io/real-ip-header: "X-Forwarded-For"
nginx.ingress.kubernetes.io/use-forwarded-headers: "true"
hosts:
- host: gitdata.ai
paths:
- path: /
pathType: Prefix
serviceName: app
servicePort: 3000
- host: static.gitdata.ai
paths:
- path: /
pathType: Prefix
serviceName: static_server
servicePort: 8081
- host: git.gitdata.ai
paths:
- path: /
pathType: Prefix
serviceName: gitserver
servicePort: 8021
tls:
- secretName: gitdata-ai-tls
hosts:
- gitdata.ai
- static.gitdata.ai
- git.gitdata.ai
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
create: true
automount: true
annotations: {}
name: ""
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
fsGroupChangePolicy: OnRootMismatch
securityContext:
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -1,9 +0,0 @@
FROM ubuntu:24.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libssl3 openssh-client procps git \
&& rm -rf /var/lib/apt/lists/*
RUN git config --system --add safe.directory '*'
WORKDIR /app
COPY ./target/release/app /bin
EXPOSE 3000
CMD ["app"]

View File

@ -1,8 +1,61 @@
FROM ubuntu:24.04 # GitDataAI Backend - Email Service
RUN apt-get update && apt-get install -y --no-install-recommends \ # Multi-stage build for Rust application
ca-certificates libssl3 \
# Stage 1: Build the application
FROM rust:1.96-bookworm AS builder
# Install system dependencies
RUN apt-get update && apt-get install -y \
pkg-config \
libssl-dev \
libpq-dev \
cmake \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Create app directory
WORKDIR /app WORKDIR /app
COPY ./target/release/email-worker /bin
EXPOSE 8084 # Copy workspace files
CMD ["email-worker"] COPY Cargo.toml Cargo.lock ./
COPY app/ app/
COPY lib/ lib/
# Build the application in release mode
RUN cargo build --release --bin email-service
# Stage 2: Create runtime image
FROM debian:bookworm-slim
# Install runtime dependencies
RUN apt-get update && apt-get install -y \
libssl3 \
libpq5 \
ca-certificates \
curl \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd -r -s /bin/false appuser
# Create directories
RUN mkdir -p /app/logs \
&& chown -R appuser:appuser /app
# Copy binary from builder
COPY --from=builder /app/target/release/email-service /app/email-service
# Set ownership
RUN chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
# Set working directory
WORKDIR /app
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD pgrep email-service || exit 1
# Run the application
CMD ["./email-service"]

View File

@ -1,9 +0,0 @@
FROM ubuntu:24.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libssl3 git \
&& rm -rf /var/lib/apt/lists/*
RUN git config --system --add safe.directory '*'
WORKDIR /app
COPY ./target/release/git-hook /bin
EXPOSE 8083
CMD ["git-hook"]

View File

@ -1,9 +0,0 @@
FROM ubuntu:24.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libssl3 git openssh-client \
&& rm -rf /var/lib/apt/lists/*
RUN git config --system --add safe.directory '*'
WORKDIR /app
COPY ./target/release/gitserver /bin
EXPOSE 8021 2222
CMD ["gitserver"]

View File

@ -1,8 +0,0 @@
FROM ubuntu:24.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libssl3 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY ./target/release/metrics-aggregator /bin
EXPOSE 9090
CMD ["metrics-aggregator"]

View File

@ -1,8 +0,0 @@
FROM ubuntu:24.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libssl3 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY ./target/release/static-server /bin
EXPOSE 8081
CMD ["static-server"]

View File

@ -16,15 +16,7 @@ export default defineConfig([
reactRefresh.configs.vite, reactRefresh.configs.vite,
], ],
languageOptions: { languageOptions: {
ecmaVersion: 2020,
globals: globals.browser, globals: globals.browser,
}, },
rules: {
'react-refresh/only-export-components': [
'warn',
{ allowExportNames: ['useThemeCustomization', 'useThemePreset', 'useRoom', 'useOptionalRoom', 'resetAllThemeVars', 'loadThemeVars', 'applyThemePreset'] },
],
'react-hooks/exhaustive-deps': 'warn',
},
}, },
]) ])

901
gene.md
View File

@ -1,901 +0,0 @@
# Gene 方案
## 这份文档针对什么
这个项目里,`Skill` 已经不是一个抽象概念,而是完整的业务实体:
* 后端有 `project_skill` 持久化模型
* Git 同步会扫描仓库里的 `SKILL.md`
* 聊天构建会把启用的技能注入上下文
* 前端有技能列表、详情、编辑、删除、扫描
* 内建技能模板也已经存在
所以这里不应该“用 Gene 替换 Skill”而应该是
* `Skill` 继续负责执行和交付能力
* `Gene` 作为新增元层,负责让技能可演化、可比较、可追踪、可继承
---
## 设计结论
`Gene` 的正确位置不是新的执行系统,也不是 `Skill` 的替代品,而是 `Skill` 的生命周期治理层。
```text
Skill = 可执行内容 + 上下文注入 + 业务交付
Gene = 演化族 + 版本谱系 + 评估记录 + 选择记录
```
更准确地说:
```text
Skill 是可执行的技能内容,负责被扫描、编辑、启用、注入和交付能力。
Gene 是 Skill 的演化族,负责组织一个 Skill 在项目内的版本、来源、谱系、评估和选择记录。
GeneRevision 是 Gene 下的不可变版本节点,绑定到某个 Skill 内容快照。
GeneEvaluation 是对某个 GeneRevision 的评估结果。
GeneSelection 是对当前有效 GeneRevision 的显式选择记录。
```
---
## 项目现状
### Skill 已经落在这些地方
* `libs/models/projects/project_skill.rs`
* 定义了 `project_skill` 实体
* 已包含 `source`
* 已包含 `repo_id`
* 已包含 `commit_sha`
* 已包含 `blob_hash`
* 已包含 `content`
* 已包含 `metadata`
* 已包含 `enabled`
* `libs/git/hook/sync/mod.rs`
* 扫描仓库中的 `SKILL.md`
* 从 frontmatter 解析 `name`、`description`、`license`、`compatibility`
* 用 `commit_sha``blob_hash` 做增量同步
* `libs/agent/skills/templates.rs`
* 内建技能模板已经编译进程序
* 这些模板本质上是“系统内置 Skill”
* `libs/agent/chat/message_builder.rs`
* 读取项目中启用的技能
* 把技能注入对话上下文
* 和 embedding / perception 一起影响模型行为
* `src/app/project/skills/*`
* 已有技能管理 UI
* 能查看、编辑、删除、扫描技能
---
## 现有 Skill 的问题
当前 `Skill` 已经能用,但还不够“可进化”:
* 只有内容,没有明确的演化关系
* 只有当前状态,没有谱系
* 只有启用/禁用,没有版本选择
* 只有来源信息,没有变体比较
* 只有同步时间和 blob hash没有“为什么变成这样”的记录
* 有评估空间,但没有评估结果和版本绑定
* 有扫描和编辑能力,但没有可审计的回滚与选择记录
换句话说,现在的 `Skill` 更像“静态资产”,还不是“进化单元”。
---
## 设计原则
### 1. Skill 仍然是唯一执行实体
`Skill` 继续负责:
* 被扫描
* 被编辑
* 被启用或禁用
* 被注入聊天上下文
* 交付实际能力
`Gene` 不直接执行任务,不直接调用工具,不直接参与上下文拼装。
---
### 2. Gene 只管理 Skill 的演化元数据
`Gene` 管理的是:
* 版本
* 来源
* 父子关系
* 变体
* 评估
* 选择
* 淘汰
* 回滚
它不应该成为第二套 `Skill` 系统。
---
### 3. GeneRevision 必须不可变
`Skill` 可以是当前可编辑对象。
`GeneRevision` 是不可变演化节点。一旦创建,不应再修改:
* 内容快照
* 父代关系
* 来源信息
* `commit_sha`
* `blob_hash`
* `content_hash`
* `mutation_reason`
* `mutation_diff`
后续任何内容变化、prompt 变化、工具权限变化、上下文注入规则变化,都应该产生新的 `GeneRevision`
---
### 4. Evaluation 必须绑定到具体 Revision
评估不是评价一个抽象的 `Gene`,而是评价某个具体版本。
因此:
```text
GeneEvaluation 必须绑定 revision_id。
```
否则同一个 `Gene` 下存在多个版本时,评估结果无法精确归因。
---
### 5. Selection 必须显式记录
如果系统选择了某个版本作为当前有效版本,必须记录:
* 选中了哪个 revision
* 为什么选它
* 谁选的
* 依据什么策略选的
* 什么时候选的
* 是否仍然 active
这能支持审计、回滚和后续自动选择。
---
### 6. SKILL.md 仍然是仓库内 Skill 的事实来源
仓库里的 `SKILL.md` 仍然是 Git 同步的事实来源。
`Gene` 只记录它如何演化,不改变仓库同步的基本语义。
---
## 核心概念
### Skill
`Skill` 是现有业务实体。
它负责:
```text
可执行内容
上下文注入
用户可见编辑
Git 扫描
启用 / 禁用
业务交付
```
### Gene
`Gene` 是某个 Skill 的演化族。
它负责组织这个 Skill 的生命周期:
```text
这个能力从哪里来
经历过哪些版本
有哪些变体
评估结果如何
当前选择哪个版本
哪些版本被淘汰
```
### GeneRevision
`GeneRevision``Gene` 下的一个不可变版本节点。
它绑定某个 `Skill` 的内容状态,例如:
```text
skill_id
skill_slug
commit_sha
blob_hash
content_hash
content_snapshot_ref
```
### GeneEvaluation
`GeneEvaluation` 是对某个 `GeneRevision` 的评估结果。
它回答:
```text
这个版本好不好?
在哪个数据集上测的?
指标是什么?
是否通过?
成本和延迟如何?
失败样本是什么?
```
### GeneSelection
`GeneSelection` 是对当前有效版本的显式选择记录。
它回答:
```text
当前选中哪个 revision
为什么选它?
谁选的?
依据什么策略?
是否还在生效?
```
---
## Gene 和 Skill 的关系
可以把关系理解为:
```text
Gene 1 ── has many ── GeneRevision
GeneRevision ── references ── Skill snapshot
GeneRevision ── has many ── GeneEvaluation
Gene ── has one active ── GeneSelection
GeneSelection ── selects ── GeneRevision
Skill ── executes ── actual capability
```
也就是说:
* `Skill` 解决“能不能做”
* `Gene` 解决“该保留哪个版本、为什么保留、怎么变体、怎么传播”
* `GeneRevision` 解决“这个能力在某一刻具体长什么样”
* `GeneEvaluation` 解决“这个版本是否足够好”
* `GeneSelection` 解决“当前应该用哪个版本”
---
## 数据模型
### ProjectGene
```text
ProjectGene
- gene_id
- project_uuid
- skill_id
- skill_slug
- name
- description
- owner
- status
- created_at
- updated_at
```
说明:
* `gene_id` 是 Gene 的唯一标识
* `project_uuid` 绑定项目
* `skill_id` / `skill_slug` 绑定现有 Skill
* `status` 可为 `active`、`archived`、`deprecated`
* `owner` 用于责任归属
---
### ProjectGeneRevision
```text
ProjectGeneRevision
- revision_id
- gene_id
- version
- parent_revision_id
- origin
- source
- skill_id
- skill_slug
- commit_sha
- blob_hash
- content_hash
- content_snapshot_ref
- mutation_reason
- mutation_diff
- created_by
- created_at
```
说明:
* `revision_id` 是内部唯一版本节点
* `version` 是用户可见版本号,不承担唯一性职责
* `parent_revision_id` 表示单父版本关系
* `origin` 表示来源,例如 `git_sync`、`manual_edit`、`builtin_template`、`migration`
* `source` 复用现有 `Skill.source`
* `commit_sha` / `blob_hash` 记录 Git 来源
* `content_hash` 记录内容稳定标识
* `content_snapshot_ref` 用于指向当时的内容快照
* `mutation_reason` 记录为什么产生这个版本
* `mutation_diff` 记录相对父版本的变化
---
### ProjectGeneEvaluation
```text
ProjectGeneEvaluation
- evaluation_id
- gene_id
- revision_id
- eval_name
- evaluator
- dataset_ref
- dataset_version
- metric_name
- score
- threshold
- passed
- sample_count
- failure_count
- latency_ms_avg
- cost
- result_summary
- result_json
- evaluated_at
```
说明:
* `revision_id` 必填
* `score` 不应脱离 `metric_name` 单独解释
* `threshold` 用于判断是否通过
* `sample_count``failure_count` 用于判断评估可信度
* `result_json` 存放详细结果、失败样本、分项指标等
---
### ProjectGeneSelection
```text
ProjectGeneSelection
- selection_id
- gene_id
- selected_revision_id
- project_uuid
- policy
- reason
- selected_by
- selected_at
- active
```
说明:
* 同一个 `gene_id` 同一时间只能有一个 active selection
* `policy` 可以是 `manual`、`latest_passed`、`best_score`、`stable_low_cost`
* `reason` 用于审计和回滚
* `selected_by` 可以是用户、系统或自动策略
---
## 关于 GeneLineage
MVP 阶段不单独引入 `GeneLineage` 表。
原因是:如果每个版本只有一个父版本,`ProjectGeneRevision.parent_revision_id` 已经足够表达谱系。
```text
MVP单父版本树
Future多父 DAG / merge / cross-skill inheritance
```
未来如果需要支持合并、交叉继承或复杂谱系,再引入:
```text
ProjectGeneEdge
- parent_revision_id
- child_revision_id
- edge_type
- mutation_reason
- mutation_diff
```
---
## 关于 Variant
`Variant` 是 Gene 的重要能力,但不一定是 MVP 的独立表。
这些变化都可以先作为新的 `GeneRevision` 表达:
* 更严格的 prompt
* 更短的 prompt
* 不同工具权限
* 不同上下文注入规则
* 不同评估阈值
MVP 中:
```text
不单独引入 ProjectGeneVariant。
所有变体先作为 GeneRevision 表达。
```
当系统需要以下能力时,再引入 `GeneExperiment` / `GeneVariant`
* A/B 实验
* 并行流量分配
* 实验分组
* 统计显著性
* 多候选版本同时比较
---
## Git 同步流程
当前 Git 同步已经会扫描仓库中的 `SKILL.md`,并使用 `commit_sha``blob_hash` 做增量同步。
引入 Gene 后Git 同步流程建议变成:
```text
当 Git 同步发现 SKILL.md 的 blob_hash 变化时:
1. 正常创建或更新 project_skill。
2. 查找该 skill 对应的 project_gene。
3. 如果不存在 project_gene则创建一个。
4. 查找该 gene 下最新的 project_gene_revision。
5. 如果新的 blob_hash / content_hash 不同,则创建新的 GeneRevision。
6. 将上一 revision 设为 parent_revision_id。
7. mutation_reason 默认记录为 git_sync。
8. mutation_diff 记录上一个 SKILL.md 与当前 SKILL.md 的 diff。
9. 不自动改变 selected_revision除非选择策略明确允许。
```
关键约束:
```text
Git sync 可以产生新的 GeneRevision。
Git sync 不应默认切换当前线上选中版本。
```
这样可以避免仓库变更自动导致线上行为漂移。
---
## 手动编辑流程
当用户在 UI 中编辑 `Skill` 内容时:
```text
1. 更新 project_skill。
2. 计算新的 content_hash。
3. 查找对应 project_gene。
4. 创建新的 project_gene_revision。
5. parent_revision_id 指向编辑前的 revision。
6. mutation_reason 记录为 manual_edit。
7. mutation_diff 记录编辑前后的差异。
8. 可选择是否自动把新 revision 标记为 selected。
```
建议 MVP 默认:
```text
手动编辑后创建新 revision但不自动覆盖 active selection。
由用户显式选择是否启用该 revision。
```
如果产品希望“编辑即生效”,也可以让 selection 同步更新,但必须记录:
```text
policy = manual_edit_auto_select
reason = "User edited skill content"
```
---
## 聊天上下文注入流程
现有流程中,`libs/agent/chat/message_builder.rs` 读取项目中启用的 `Skill`,并把技能注入对话上下文。
引入 Gene 后,这个原则不变:
```text
message_builder 不直接读取 Gene 内容。
message_builder 仍然读取启用的 Skill。
Gene 只影响哪个 Skill revision 被视为推荐版本或当前选中版本。
```
如果未来要让 `GeneSelection` 影响上下文注入,推荐路径是:
```text
GeneSelection
-> resolve selected GeneRevision
-> materialize / update project_skill
-> message_builder reads project_skill
```
不建议:
```text
message_builder
-> read Gene
-> assemble prompt
```
因为这会让 `Gene` 偷偷变成新的执行层。
---
## API 设计
MVP API 可以包括:
```text
GET /projects/:project_uuid/skills/:skill_id/gene
POST /projects/:project_uuid/skills/:skill_id/gene
GET /projects/:project_uuid/genes/:gene_id/revisions
POST /projects/:project_uuid/genes/:gene_id/revisions
GET /projects/:project_uuid/genes/:gene_id/evaluations
POST /projects/:project_uuid/genes/:gene_id/evaluations
GET /projects/:project_uuid/genes/:gene_id/selection
POST /projects/:project_uuid/genes/:gene_id/select
```
选择接口示例:
```json
{
"selected_revision_id": "rev_123",
"policy": "manual",
"reason": "Higher pass rate on regression eval"
}
```
评估写入接口示例:
```json
{
"revision_id": "rev_123",
"eval_name": "skill_regression_eval",
"dataset_ref": "datasets/skill-regression-v1",
"dataset_version": "2026-01-01",
"metric_name": "task_success_rate",
"score": 0.92,
"threshold": 0.85,
"passed": true,
"sample_count": 100,
"failure_count": 8,
"latency_ms_avg": 1200,
"cost": 0.34
}
```
---
## UI 设计
`Gene` 不替代现有技能管理 UI。
推荐把它放在 `Skill Detail` 页面中的一个“演化”标签页。
```text
Skill Detail
- 基本信息
- 内容编辑
- 启用状态
- Evolution / Gene
- 当前选中 revision
- 版本列表
- 父子关系
- 每个版本的 diff
- 每个版本的评估结果
- 选择按钮
- 回滚按钮
```
MVP UI 可以先做只读:
```text
1. 显示 Gene 信息
2. 显示 Revision 列表
3. 显示每个 Revision 的来源、时间、commit_sha、blob_hash
4. 显示相邻 Revision 的 diff
```
第二阶段再加入:
```text
1. 手动选择 revision
2. 回滚 revision
3. 展示评估结果
4. 根据评估结果推荐版本
```
---
## 评估指标
Gene 的核心不是“更像生物学”,而是“更像可验证的演化对象”。
每个 `GeneRevision` 都应该支持评估,例如:
* 任务成功率
* 失败率
* 平均耗时
* 误触发率
* 人工接受率
* 回滚率
* 成本
* 稳定性
* 安全失败数
* 用户满意度
没有评估的 Gene只是重命名后的 Skill 管理。
---
## 选择策略
如果存在多个 GeneRevision系统应该能选择更优版本。
MVP 阶段不做自动选择,只做人工选择和可审计回滚。
后续选择规则可以逐步加入:
```text
1. 先看是否通过基础测试
2. 再看近期成功率
3. 再看失败率
4. 再看成本
5. 再看延迟
6. 再看稳定性
7. 再看人工接受率
```
可选策略:
```text
manual
latest_passed
best_score
stable_low_cost
lowest_latency
highest_acceptance_rate
```
自动选择必须满足:
```text
有评估数据
有样本量
有失败分类
有回滚机制
有选择审计记录
```
---
## Migration 策略
对于已有的 `project_skill`,可以执行一次初始化迁移:
```text
对每个 project_skill
1. 创建 project_gene。
2. 创建初始 project_gene_revision。
3. revision.origin = migration。
4. revision.skill_id = project_skill.id。
5. revision.skill_slug = project_skill.slug。
6. revision.commit_sha = project_skill.commit_sha。
7. revision.blob_hash = project_skill.blob_hash。
8. revision.content_hash = hash(project_skill.content)。
9. revision.content_snapshot_ref = 当前 Skill 内容快照。
10. 创建 active project_gene_selection。
11. selected_revision_id = 初始 revision。
12. policy = migration。
13. reason = "Initial gene selection from existing project_skill"。
```
这样现有 Skill 都可以无损进入 Gene 生命周期模型。
---
## 推荐实现顺序
```text
1. 保持 Skill 执行、扫描、编辑、注入流程不变。
2. 增加 project_gene 和 project_gene_revision 表。
3. 为现有 project_skill 执行一次 migration每个 Skill 创建一个 Gene 和初始 Revision。
4. 在 Git sync 发现 blob_hash 变化时,自动创建新的 GeneRevision。
5. 在 Skill 详情页增加只读版本历史和 diff 展示。
6. 增加 ProjectGeneEvaluation 表和手动写入 API。
7. 增加 ProjectGeneSelection 表和人工选择 / 回滚能力。
8. 当评估数据稳定后,再做自动选择策略。
9. 最后再考虑 Variant / Experiment / A-B 测试。
```
---
## 落地判断标准
一个能力如果只是:
* 能被扫描
* 能被编辑
* 能被启用或禁用
* 能被注入上下文
* 能交付业务能力
它还是 `Skill`
一个能力如果还能:
* 被追踪来源
* 被比较版本
* 被记录变体
* 被评估好坏
* 被继承和淘汰
* 被审计选择
* 被安全回滚
它才进入 `Gene` 管理范畴。
---
## 非目标
Gene MVP 不做以下事情:
```text
1. 不替换 project_skill。
2. 不改变 SKILL.md 作为仓库技能事实来源的地位。
3. 不直接参与聊天上下文构建。
4. 不直接执行工具调用。
5. 不自动改写 Skill 内容。
6. 不在没有评估和回滚机制的情况下自动切换线上版本。
7. 不一开始支持复杂遗传算法、交叉、随机变异或自动进化。
8. 不新增一套和 Skill 并列的执行 UI。
```
---
## 风险与约束
### 1. Gene 变成另一个 Skill
风险:
```text
Gene 中也开始存 prompt、工具权限、上下文注入规则并且聊天时直接读取 Gene。
```
规避:
```text
Gene 不存放执行主内容。
执行内容仍然归 Skill 所有。
GeneRevision 只引用或快照 Skill 的某个状态。
```
---
### 2. 评估绑定到可变 Skill
风险:
```text
评估结果只挂在 skill_id 上Skill 内容变化后,评估记录失真。
```
规避:
```text
评估必须绑定 revision_id + content_hash / blob_hash。
```
---
### 3. 自动选择过早上线
风险:
```text
没有足够评估数据时自动切换版本,导致线上行为漂移。
```
规避:
```text
MVP 只做人工选择和可审计回滚。
自动选择必须依赖稳定评估和回滚机制。
```
---
### 4. mutation_diff 膨胀
风险:
```text
每个版本都保存完整 diff长期可能膨胀。
```
规避:
```text
MVP 可直接存 mutation_diff。
后续引入 mutation_diff_ref 或对象存储引用。
```
---
### 5. version 语义不清
风险:
```text
version 同时承担用户展示、唯一标识、Git 版本等多种含义。
```
规避:
```text
revision_id = 系统内部唯一版本节点
version = 用户可见版本号
commit_sha / blob_hash = Git 来源版本标识
content_hash = 内容稳定标识
```
---
## 最终结论
这个项目已经具备 `Skill` 的完整工程闭环。
`Gene` 的正确位置不是替换它,而是补上它缺失的演化层:
```text
Skill 负责落地执行
Gene 负责生命周期治理
GeneRevision 负责不可变版本节点
GeneEvaluation 负责版本质量判断
GeneSelection 负责显式选择和回滚
```
两者结合后,技能系统才从“可配置”变成“可进化”。

View File

@ -2,13 +2,12 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<link href="/logo.png" rel="icon" type="image/svg+xml"/> <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<meta content="width=device-width, initial-scale=1.0" name="viewport"/> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>GitData.AI</title> <title>GitDataAI</title>
<link rel="preload" href="/@fs/node_modules/@fontsource-variable/geist/files/Geist%5Bwght%5D.woff2" as="font" type="font/woff2" crossorigin />
</head> </head>
<body> <body>
<div id="root"></div> <div id="root"></div>
<script src="/src/main.tsx" type="module"></script> <script type="module" src="/src/main.tsx"></script>
</body> </body>
</html> </html>

1
lib.rs
View File

@ -1 +0,0 @@
// Frontend embedding is handled by libs/frontend crate. ci

View File

@ -1,47 +0,0 @@
[package]
name = "agent"
version.workspace = true
edition.workspace = true
authors.workspace = true
description.workspace = true
repository.workspace = true
readme.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
documentation.workspace = true
[lib]
path = "lib.rs"
name = "agent"
[features]
default = ["rig"]
rig = []
[dependencies]
rig-core = { workspace = true, features = ["derive"] }
tokio = { workspace = true }
async-trait = { workspace = true }
qdrant-client = { workspace = true }
sea-orm = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
db = { workspace = true }
config = { path = "../config" }
models = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true, features = ["v7"] }
futures = { workspace = true }
tiktoken-rs = { workspace = true }
once_cell = { workspace = true }
regex = { workspace = true }
tracing = { workspace = true }
metrics = { workspace = true }
rust_decimal = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
utoipa = { workspace = true }
tokio-stream = { workspace = true }
redis = { workspace = true, features = ["tokio-comp"] }
queue = { workspace = true }
[lints]
workspace = true

View File

@ -1,4 +0,0 @@
//! Rig-based agent using rig's built-in Agent with full feature support.
pub mod rig_tool;
pub use rig_tool::{AgentResponse, RigAgentService, StreamChunk};

View File

@ -1,234 +0,0 @@
use futures::Stream;
use futures::StreamExt;
use rig::{
agent::{AgentBuilder, MultiTurnStreamItem},
client::CompletionClient,
completion::Prompt,
streaming::{StreamedAssistantContent, StreamingPrompt},
};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use crate::client::AiClientConfig;
use crate::error::AgentError;
#[derive(Debug)]
pub struct AgentResponse {
pub content: String,
pub input_tokens: u64,
pub output_tokens: u64,
}
#[derive(Debug)]
pub enum StreamChunk {
Text(String),
Final {
content: String,
input_tokens: u64,
output_tokens: u64,
},
}
pub struct RigAgentService {
config: AiClientConfig,
model_name: String,
}
impl RigAgentService {
pub fn new(config: AiClientConfig, model_name: impl Into<String>) -> Self {
Self {
config,
model_name: model_name.into(),
}
}
pub async fn prompt(
&self,
system_prompt: &str,
user_input: &str,
) -> std::result::Result<AgentResponse, AgentError> {
let client = self.config.build_rig_client();
let model = client.completion_model(&self.model_name);
let agent = AgentBuilder::new(model).preamble(system_prompt).build();
let response = agent
.prompt(user_input)
.extended_details()
.await
.map_err(|e: rig::completion::PromptError| AgentError::OpenAi(e.to_string()))?;
Ok(AgentResponse {
content: response.output,
input_tokens: response.usage.input_tokens,
output_tokens: response.usage.output_tokens,
})
}
pub async fn prompt_with_tools(
&self,
system_prompt: &str,
user_input: &str,
tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>>,
max_turns: usize,
) -> std::result::Result<AgentResponse, AgentError> {
let client = self.config.build_rig_client();
let model = client.completion_model(&self.model_name);
let agent = AgentBuilder::new(model)
.preamble(system_prompt)
.tools(tools)
.default_max_turns(max_turns)
.build();
let response = agent
.prompt(user_input)
.max_turns(max_turns)
.extended_details()
.await
.map_err(|e: rig::completion::PromptError| AgentError::OpenAi(e.to_string()))?;
Ok(AgentResponse {
content: response.output,
input_tokens: response.usage.input_tokens,
output_tokens: response.usage.output_tokens,
})
}
pub async fn stream_prompt(
&self,
system_prompt: &str,
user_input: &str,
) -> std::result::Result<
impl Stream<Item = std::result::Result<StreamChunk, AgentError>>,
AgentError,
> {
let client = self.config.build_rig_client();
let model = client.completion_model(&self.model_name);
let agent = AgentBuilder::new(model).preamble(system_prompt).build();
let stream: rig::agent::StreamingResult<_> = agent.stream_prompt(user_input).await;
let (tx, rx) = mpsc::channel::<std::result::Result<StreamChunk, AgentError>>(100);
tokio::spawn(async move {
let mut final_content = String::new();
tokio::pin!(stream);
while let Some(item) = stream.next().await {
match item {
Ok(MultiTurnStreamItem::StreamAssistantItem(
StreamedAssistantContent::Text(text),
)) => {
let _ = tx.send(Ok(StreamChunk::Text(text.text.clone()))).await;
final_content.push_str(&text.text);
}
Ok(MultiTurnStreamItem::StreamAssistantItem(
StreamedAssistantContent::ToolCall {
tool_call,
internal_call_id: _,
},
)) => {
let args_str = match &tool_call.function.arguments {
serde_json::Value::String(s) => s.clone(),
v => serde_json::to_string(v).unwrap_or_default(),
};
tracing::info!(
tool = %tool_call.function.name,
args = %args_str,
"rig_agent_streaming_tool_call"
);
}
Ok(MultiTurnStreamItem::StreamUserItem(
rig::streaming::StreamedUserContent::ToolResult { tool_result, .. },
)) => {
tracing::info!(
tool_result_id = %tool_result.id,
"rig_agent_streaming_tool_result"
);
}
Ok(MultiTurnStreamItem::FinalResponse(resp)) => {
let usage = resp.usage();
let _ = tx
.send(Ok(StreamChunk::Final {
content: final_content.clone(),
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
}))
.await;
}
Err(e) => {
let _ = tx.send(Err(AgentError::OpenAi(e.to_string()))).await;
}
_ => {}
}
}
});
Ok(ReceiverStream::new(rx))
}
pub async fn stream_prompt_with_tools(
&self,
system_prompt: &str,
user_input: &str,
tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>>,
max_turns: usize,
) -> std::result::Result<
impl Stream<Item = std::result::Result<StreamChunk, AgentError>>,
AgentError,
> {
let client = self.config.build_rig_client();
let model = client.completion_model(&self.model_name);
let agent = AgentBuilder::new(model)
.preamble(system_prompt)
.tools(tools)
.default_max_turns(max_turns)
.build();
let stream = agent
.stream_prompt(user_input)
.with_history(Vec::<rig::completion::Message>::new())
.multi_turn(max_turns)
.await;
let (tx, rx) = mpsc::channel::<Result<StreamChunk, AgentError>>(100);
tokio::spawn(async move {
let mut final_content = String::new();
tokio::pin!(stream);
while let Some(item) = stream.next().await {
match item {
Ok(MultiTurnStreamItem::StreamAssistantItem(
StreamedAssistantContent::Text(text),
)) => {
let _ = tx.send(Ok(StreamChunk::Text(text.text.clone()))).await;
final_content.push_str(&text.text);
}
Ok(MultiTurnStreamItem::FinalResponse(resp)) => {
let usage = resp.usage();
let _ = tx
.send(Ok(StreamChunk::Final {
content: final_content.clone(),
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
}))
.await;
}
Err(e) => {
let _ = tx.send(Err(AgentError::OpenAi(e.to_string()))).await;
}
_ => {}
}
}
});
Ok(ReceiverStream::new(rx))
}
pub fn count_tokens(&self, text: &str) -> Result<usize, AgentError> {
crate::tokent::count_text(text, &self.model_name)
.map_err(|e| AgentError::Internal(e.to_string()))
}
}

View File

@ -1,668 +0,0 @@
//! Billing service — handles user-level and project-level billing, deduction,
//! credit initialization, and error persistence.
//!
//! Architecture:
//! - Each user gets $10 personal balance on signup.
//! - Each project gets $20 balance only if it's the creator's first project,
//! $0 otherwise.
//! - AI usage is deducted from the project balance first; if insufficient,
//! falls through to the user's personal balance.
//! - Monthly quota only applies to pro users (is_pro = true).
//! - If both project and user balance are insufficient, a billing_error
//! record is persisted and an error is returned to the caller.
use db::database::AppDatabase;
use models::agents::model_pricing;
use models::ai::billing_error;
use models::projects::{project, project_billing, project_billing_history};
use models::users::{user_billing, user_billing_history};
use rust_decimal::Decimal;
use sea_orm::*;
use uuid::Uuid;
use crate::error::AgentError;
fn default_user_balance() -> Decimal {
Decimal::new(100_000, 4)
} // $10.0000
fn first_project_credit() -> Decimal {
Decimal::new(200_000, 4)
} // $20.0000
const SUBSEQUENT_PROJECT_BALANCE: Decimal = Decimal::ZERO;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
pub struct BillingRecord {
pub cost: f64,
pub currency: String,
pub input_tokens: i64,
pub output_tokens: i64,
pub deducted_from: String, // "project" or "user"
}
#[derive(Debug)]
pub enum BillingResult {
Success(BillingRecord),
InsufficientBalance { message: String },
}
/// Record AI usage: deduct from project balance first, fall through to user balance.
///
/// Returns `InsufficientBalance` if neither account can cover the cost.
/// On insufficient balance, a `billing_error` record is persisted for frontend display.
pub async fn record_ai_usage(
db: &AppDatabase,
project_uid: Uuid,
user_uid: Uuid,
model_id: Uuid,
input_tokens: i64,
output_tokens: i64,
) -> Result<BillingResult, AgentError> {
let total_cost = compute_cost(db, model_id, input_tokens, output_tokens).await?;
let currency = get_currency(db, model_id).await?;
// Verify project exists
let _ = project::Entity::find_by_id(project_uid)
.one(db)
.await?
.ok_or_else(|| AgentError::Internal("Project not found".into()))?;
// Attempt project-level deduction first
let project_result = deduct_from_project(
db,
project_uid,
total_cost,
&currency,
model_id,
input_tokens,
output_tokens,
)
.await;
match project_result {
Ok(()) => {
let cost_f64 = decimal_to_f64(total_cost);
tracing::info!(
project_id = %project_uid,
model_id = %model_id,
input_tokens, output_tokens,
cost = %cost_f64,
currency = %currency,
deducted_from = "project",
"ai_usage_recorded"
);
Ok(BillingResult::Success(BillingRecord {
cost: cost_f64,
currency,
input_tokens,
output_tokens,
deducted_from: "project".to_string(),
}))
}
Err(_) => {
// Project balance insufficient — try user personal balance
let user_result = deduct_from_user(
db,
user_uid,
total_cost,
&currency,
project_uid,
model_id,
input_tokens,
output_tokens,
)
.await;
match user_result {
Ok(()) => {
let cost_f64 = decimal_to_f64(total_cost);
tracing::info!(
user_id = %user_uid,
project_id = %project_uid,
model_id = %model_id,
input_tokens, output_tokens,
cost = %cost_f64,
currency = %currency,
deducted_from = "user",
"ai_usage_recorded"
);
Ok(BillingResult::Success(BillingRecord {
cost: cost_f64,
currency,
input_tokens,
output_tokens,
deducted_from: "user".to_string(),
}))
}
Err(insufficient_msg) => {
// Both project and user balance insufficient — persist error
persist_billing_error(
db,
"project",
project_uid,
"insufficient_balance",
&insufficient_msg,
Some(serde_json::json!({
"user_id": user_uid.to_string(),
"model_id": model_id.to_string(),
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"cost": decimal_to_f64(total_cost),
"currency": currency,
})),
)
.await?;
Ok(BillingResult::InsufficientBalance {
message: insufficient_msg,
})
}
}
}
}
}
/// Record personal AI usage against the user's own billing balance.
pub async fn record_user_ai_usage(
db: &AppDatabase,
user_uid: Uuid,
model_id: Uuid,
input_tokens: i64,
output_tokens: i64,
) -> Result<BillingResult, AgentError> {
let total_cost = compute_cost(db, model_id, input_tokens, output_tokens).await?;
let currency = get_currency(db, model_id).await?;
match deduct_from_user_personal(
db,
user_uid,
total_cost,
&currency,
model_id,
input_tokens,
output_tokens,
)
.await
{
Ok(()) => {
let cost_f64 = decimal_to_f64(total_cost);
tracing::info!(
user_id = %user_uid,
model_id = %model_id,
input_tokens, output_tokens,
cost = %cost_f64,
currency = %currency,
deducted_from = "user",
scope = "personal",
"ai_usage_recorded"
);
Ok(BillingResult::Success(BillingRecord {
cost: cost_f64,
currency,
input_tokens,
output_tokens,
deducted_from: "user".to_string(),
}))
}
Err(insufficient_msg) => {
persist_billing_error(
db,
"user",
user_uid,
"insufficient_balance",
&insufficient_msg,
Some(serde_json::json!({
"user_id": user_uid.to_string(),
"model_id": model_id.to_string(),
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"cost": decimal_to_f64(total_cost),
"currency": currency,
"scope": "personal",
})),
)
.await?;
Ok(BillingResult::InsufficientBalance {
message: insufficient_msg,
})
}
}
}
/// Check whether a project + user has sufficient combined balance for a potential AI call.
/// Called before starting AI processing to avoid wasted compute.
pub async fn check_balance(
db: &AppDatabase,
project_uid: Uuid,
user_uid: Uuid,
model_id: Uuid,
estimated_input_tokens: i64,
estimated_output_tokens: i64,
) -> Result<bool, AgentError> {
let estimated_cost = compute_cost(
db,
model_id,
estimated_input_tokens,
estimated_output_tokens,
)
.await?;
let project_balance = get_project_balance(db, project_uid).await;
let user_balance = get_user_balance(db, user_uid).await;
Ok(project_balance + user_balance >= estimated_cost)
}
/// Check whether a user's personal balance can cover a potential AI call.
pub async fn check_user_balance(
db: &AppDatabase,
user_uid: Uuid,
model_id: Uuid,
estimated_input_tokens: i64,
estimated_output_tokens: i64,
) -> Result<bool, AgentError> {
let estimated_cost = compute_cost(
db,
model_id,
estimated_input_tokens,
estimated_output_tokens,
)
.await?;
let user_balance = get_user_balance(db, user_uid).await;
Ok(user_balance >= estimated_cost)
}
// ── Initialization ──
/// Initialize a user billing account with the default $10 balance.
/// Called on user signup / first login.
pub async fn initialize_user_billing(db: &AppDatabase, user_uid: Uuid) -> Result<(), AgentError> {
let now = chrono::Utc::now();
user_billing::ActiveModel {
user: Set(user_uid),
balance: Set(default_user_balance()),
currency: Set("USD".to_string()),
is_pro: Set(false),
monthly_quota: Set(Decimal::ZERO),
month_used: Set(Decimal::ZERO),
cycle_start: Set(None),
cycle_end: Set(None),
updated_at: Set(now),
created_at: Set(now),
}
.insert(db)
.await
.map_err(|e| AgentError::Internal(format!("failed to create user billing: {}", e)))?;
tracing::info!(user_id = %user_uid, balance = "$10", "user_billing_initialized");
Ok(())
}
/// Initialize a project billing account.
/// Grants $20 only if this is the creator's first project; $0 otherwise.
pub async fn initialize_project_billing(
db: &AppDatabase,
project_uid: Uuid,
creator_uid: Uuid,
) -> Result<(), AgentError> {
// Check how many projects this user has already created
let existing_count = project::Entity::find()
.filter(project::Column::CreatedBy.eq(creator_uid))
.filter(project::Column::Id.ne(project_uid))
.count(db)
.await
.map_err(|e| AgentError::Internal(format!("failed to count user projects: {}", e)))?;
let is_first = existing_count == 0;
let initial_balance = if is_first {
first_project_credit()
} else {
SUBSEQUENT_PROJECT_BALANCE
};
let now = chrono::Utc::now();
project_billing::ActiveModel {
project: Set(project_uid),
balance: Set(initial_balance),
currency: Set("USD".to_string()),
user: Set(Some(creator_uid)),
initial_credit_granted: Set(is_first),
is_pro: Set(false),
monthly_quota: Set(Decimal::ZERO),
month_used: Set(Decimal::ZERO),
cycle_start: Set(None),
cycle_end: Set(None),
updated_at: Set(now),
created_at: Set(now),
}
.insert(db)
.await
.map_err(|e| AgentError::Internal(format!("failed to create project billing: {}", e)))?;
if is_first {
// Record the credit in billing history
project_billing_history::ActiveModel {
uid: Set(Uuid::new_v4()),
project: Set(project_uid),
user: Set(Some(creator_uid)),
amount: Set(first_project_credit()),
currency: Set("USD".to_string()),
reason: Set("first_project_credit".to_string()),
extra: Set(Some(serde_json::json!({
"is_first_project": true,
}))),
created_at: Set(now),
..Default::default()
}
.insert(db)
.await
.map_err(|e| AgentError::Internal(format!("failed to record credit history: {}", e)))?;
}
tracing::info!(
project_id = %project_uid,
creator_id = %creator_uid,
is_first_project = is_first,
balance = if is_first { "$20" } else { "$0" },
"project_billing_initialized"
);
Ok(())
}
// ── Internal helpers ──
async fn compute_cost(
db: &AppDatabase,
model_id: Uuid,
input_tokens: i64,
output_tokens: i64,
) -> Result<Decimal, AgentError> {
let pricing = model_pricing::Entity::find()
.filter(model_pricing::Column::ModelVersionId.eq(model_id))
.order_by_desc(model_pricing::Column::EffectiveFrom)
.one(db)
.await?
.ok_or_else(|| {
AgentError::Internal(
"No pricing record found for this model. Please configure AI model pricing first."
.into(),
)
})?;
let input_price: Decimal = pricing
.input_price_per_1k_tokens
.parse()
.map_err(|e| AgentError::Internal(format!("Invalid input price: {}", e)))?;
let output_price: Decimal = pricing
.output_price_per_1k_tokens
.parse()
.map_err(|e| AgentError::Internal(format!("Invalid output price: {}", e)))?;
if input_price <= Decimal::ZERO && output_price <= Decimal::ZERO {
return Err(AgentError::Internal(
"Model pricing is not configured or is zero. Please configure non-zero AI model pricing first."
.into(),
));
}
// DB stores per-1M-token prices; divide tokens by 1M to compute cost.
let million = Decimal::from(1_000_000);
Ok((Decimal::from(input_tokens) / million) * input_price
+ (Decimal::from(output_tokens) / million) * output_price)
}
async fn get_currency(db: &AppDatabase, model_id: Uuid) -> Result<String, AgentError> {
let pricing = model_pricing::Entity::find()
.filter(model_pricing::Column::ModelVersionId.eq(model_id))
.one(db)
.await?
.ok_or_else(|| AgentError::Internal("No pricing found".into()))?;
Ok(pricing.currency.clone())
}
async fn get_project_balance(db: &AppDatabase, project_uid: Uuid) -> Decimal {
project_billing::Entity::find_by_id(project_uid)
.one(db)
.await
.ok()
.flatten()
.map(|b| b.balance)
.unwrap_or(Decimal::ZERO)
}
async fn get_user_balance(db: &AppDatabase, user_uid: Uuid) -> Decimal {
user_billing::Entity::find_by_id(user_uid)
.one(db)
.await
.ok()
.flatten()
.map(|b| b.balance)
.unwrap_or(Decimal::ZERO)
}
async fn deduct_from_project(
db: &AppDatabase,
project_uid: Uuid,
cost: Decimal,
currency: &str,
model_id: Uuid,
input_tokens: i64,
output_tokens: i64,
) -> Result<(), String> {
let txn = db
.begin()
.await
.map_err(|e| format!("db txn error: {}", e))?;
let billing = project_billing::Entity::find_by_id(project_uid)
.lock_exclusive()
.one(&txn)
.await
.map_err(|e| format!("db error: {}", e))?
.ok_or_else(|| "Project billing account not found".to_string())?;
if billing.balance < cost {
txn.rollback().await.ok();
return Err(format!(
"Project balance insufficient. Required: {:.4} {}, Available: {:.4} {}",
cost, currency, billing.balance, currency
));
}
let now = chrono::Utc::now();
project_billing_history::ActiveModel {
uid: Set(Uuid::new_v4()),
project: Set(project_uid),
user: Set(None),
amount: Set(-cost),
currency: Set(currency.to_string()),
reason: Set("ai_usage".to_string()),
extra: Set(Some(serde_json::json!({
"model_id": model_id.to_string(),
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"deducted_from": "project",
}))),
created_at: Set(now),
..Default::default()
}
.insert(&txn)
.await
.map_err(|e| format!("failed to insert history: {}", e))?;
let mut updated: project_billing::ActiveModel = billing.into();
updated.balance = Set(updated.balance.unwrap() - cost);
updated.updated_at = Set(now);
updated
.update(&txn)
.await
.map_err(|e| format!("failed to update balance: {}", e))?;
txn.commit()
.await
.map_err(|e| format!("commit error: {}", e))?;
Ok(())
}
async fn deduct_from_user(
db: &AppDatabase,
user_uid: Uuid,
cost: Decimal,
currency: &str,
project_uid: Uuid,
model_id: Uuid,
input_tokens: i64,
output_tokens: i64,
) -> Result<(), String> {
let txn = db
.begin()
.await
.map_err(|e| format!("db txn error: {}", e))?;
let billing = user_billing::Entity::find_by_id(user_uid)
.lock_exclusive()
.one(&txn)
.await
.map_err(|e| format!("db error: {}", e))?
.ok_or_else(|| "User billing account not found".to_string())?;
if billing.balance < cost {
txn.rollback().await.ok();
return Err(format!(
"Insufficient balance (project + user). Project: unavailable, User: {:.4} {}. Required: {:.4} {}",
billing.balance, currency, cost, currency
));
}
let now = chrono::Utc::now();
// Record in project billing history (but deducted from user)
project_billing_history::ActiveModel {
uid: Set(Uuid::new_v4()),
project: Set(project_uid),
user: Set(Some(user_uid)),
amount: Set(-cost),
currency: Set(currency.to_string()),
reason: Set("ai_usage_user_fallback".to_string()),
extra: Set(Some(serde_json::json!({
"model_id": model_id.to_string(),
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"deducted_from": "user",
}))),
created_at: Set(now),
..Default::default()
}
.insert(&txn)
.await
.map_err(|e| format!("failed to insert history: {}", e))?;
let mut updated: user_billing::ActiveModel = billing.into();
updated.balance = Set(updated.balance.unwrap() - cost);
updated.updated_at = Set(now);
updated
.update(&txn)
.await
.map_err(|e| format!("failed to update user balance: {}", e))?;
txn.commit()
.await
.map_err(|e| format!("commit error: {}", e))?;
Ok(())
}
async fn deduct_from_user_personal(
db: &AppDatabase,
user_uid: Uuid,
cost: Decimal,
currency: &str,
model_id: Uuid,
input_tokens: i64,
output_tokens: i64,
) -> Result<(), String> {
let txn = db
.begin()
.await
.map_err(|e| format!("db txn error: {}", e))?;
let billing = user_billing::Entity::find_by_id(user_uid)
.lock_exclusive()
.one(&txn)
.await
.map_err(|e| format!("db error: {}", e))?
.ok_or_else(|| "User billing account not found".to_string())?;
if billing.balance < cost {
txn.rollback().await.ok();
return Err(format!(
"Insufficient balance. User: {:.4} {}. Required: {:.4} {}",
billing.balance, billing.currency, cost, billing.currency
));
}
let now = chrono::Utc::now();
user_billing_history::ActiveModel {
uid: Set(Uuid::new_v4()),
user: Set(user_uid),
amount: Set(-cost),
currency: Set(currency.to_string()),
reason: Set("ai_usage_personal".to_string()),
extra: Set(Some(serde_json::json!({
"model_id": model_id.to_string(),
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"deducted_from": "user",
"scope": "personal",
}))),
created_at: Set(now),
..Default::default()
}
.insert(&txn)
.await
.map_err(|e| format!("failed to insert user history: {}", e))?;
let mut updated: user_billing::ActiveModel = billing.into();
updated.balance = Set(updated.balance.unwrap() - cost);
updated.updated_at = Set(now);
updated
.update(&txn)
.await
.map_err(|e| format!("failed to update user balance: {}", e))?;
txn.commit()
.await
.map_err(|e| format!("commit error: {}", e))?;
Ok(())
}
pub async fn persist_billing_error(
db: &AppDatabase,
scope: &str,
scope_id: Uuid,
error_type: &str,
message: &str,
details: Option<serde_json::Value>,
) -> Result<(), AgentError> {
billing_error::ActiveModel {
id: Set(Uuid::new_v4()),
scope: Set(scope.to_string()),
scope_id: Set(scope_id),
error_type: Set(error_type.to_string()),
message: Set(message.to_string()),
details: Set(details),
resolved: Set(false),
created_at: Set(chrono::Utc::now()),
}
.insert(db)
.await
.map_err(|e| AgentError::Internal(format!("failed to persist billing error: {}", e)))?;
tracing::warn!(scope, %scope_id, error_type, "billing_error_persisted");
Ok(())
}
fn decimal_to_f64(d: Decimal) -> f64 {
d.round_dp(10).to_string().parse().unwrap_or(0.0)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,163 +0,0 @@
use std::pin::Pin;
use config::AppConfig;
use db::cache::AppCache;
use db::database::AppDatabase;
use models::agents::model;
use models::projects::{project, project_context_setting};
use models::repos::repo;
use models::rooms::{room, room_message};
use models::users::user;
use std::collections::HashMap;
use uuid::Uuid;
/// Maximum recursion rounds for tool-call loops (AI → tool → result → AI).
/// Previous default of 3 caused frequent silent termination on realistic multi-step queries.
pub const DEFAULT_MAX_TOOL_DEPTH: usize = 99;
/// A single chunk from an AI streaming response.
#[derive(Debug, Clone)]
pub struct AiStreamChunk {
pub content: String,
pub done: bool,
/// What kind of content this chunk contains — helps the frontend render
/// thinking, tool calls, and results with different styles.
pub chunk_type: AiChunkType,
/// Structured metadata for tool_call / tool_result events.
/// tool_call: {"tool": "...", "args": {...}}
/// tool_result: {"tool": "...", "status": "ok|error", "result": "..."}
pub metadata: Option<serde_json::Value>,
/// Optional ID of a child process/agent, sent to frontend via SSE.
pub children_id: Option<String>,
}
/// Type of streaming chunk, used by the frontend for rendering.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AiChunkType {
/// AI reasoning/thinking text before a tool call or answer.
Thinking,
/// Final answer text from the AI.
Answer,
/// A tool call is being executed (content = tool name + args summary).
ToolCall,
/// Tool execution result (content = result or error).
ToolResult,
}
impl Default for AiChunkType {
fn default() -> Self {
Self::Answer
}
}
const THINK_OPEN: &str = "\x3cthinking\x3e";
const THINK_CLOSE: &str = "\x3c/response\x3e";
/// Strip XML-format thinking tags that some models (e.g. DeepSeek-R1) embed
/// in reasoning output. Also normalizes excessive consecutive newlines (3+ → 2).
pub fn normalize_thinking_content(content: &str) -> String {
let content = content
.replace(THINK_CLOSE, "")
.replace(THINK_OPEN, "")
.replace("\x3cthinking", "")
.replace("/response\x3e", "");
let mut result = String::with_capacity(content.len());
let mut newline_count = 0usize;
for ch in content.chars() {
if ch == '\n' {
newline_count += 1;
if newline_count <= 2 {
result.push(ch);
}
} else {
newline_count = 0;
result.push(ch);
}
}
result.trim().to_string()
}
pub type StreamCallback = Box<
dyn Fn(AiStreamChunk) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync,
>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AgentRole {
Default,
Supervisor,
Researcher,
Analyst,
Reviewer,
Architect,
Debugger,
Implementer,
Tester,
Security,
}
#[derive(Debug, Clone, Default)]
pub struct AgentExecutionProfile {
pub role: AgentRole,
pub system_prompt: Option<String>,
pub temperature: Option<f64>,
pub max_tokens: Option<i32>,
pub top_p: Option<f64>,
pub frequency_penalty: Option<f64>,
pub presence_penalty: Option<f64>,
pub max_tool_depth: Option<usize>,
pub allowed_tools: Option<Vec<String>>,
pub disable_orchestration: bool,
}
impl Default for AgentRole {
fn default() -> Self {
Self::Default
}
}
#[derive(Clone)]
pub struct AiRequest {
pub db: AppDatabase,
pub cache: AppCache,
pub config: AppConfig,
pub model: model::Model,
pub project: project::Model,
pub context_setting: Option<project_context_setting::Model>,
pub sender: user::Model,
pub room: room::Model,
pub input: String,
pub mention: Vec<Mention>,
pub history: Vec<room_message::Model>,
pub history_cutoff_seq: Option<i64>,
pub user_names: HashMap<Uuid, String>,
pub temperature: f64,
pub max_tokens: i32,
pub top_p: f64,
pub frequency_penalty: f64,
pub presence_penalty: f64,
pub think: bool,
pub tools: Option<Vec<serde_json::Value>>,
pub max_tool_depth: usize,
pub execution_profile: Option<AgentExecutionProfile>,
pub room_preamble: Option<String>,
}
#[derive(Clone)]
pub enum Mention {
User(user::Model),
Repo(repo::Model),
}
pub mod agent_profile;
pub mod chat_execution;
pub mod context;
pub mod message_builder;
pub mod nonstreaming_execution;
pub mod orchestrator;
pub mod react_execution;
pub mod service;
pub mod session_recording;
pub mod state;
pub mod streaming_execution;
pub use context::{AiContextSenderType, RoomMessageContext};
pub use service::ChatService;
pub use state::{AgentRuntime, AgentState};

View File

@ -1,317 +0,0 @@
use std::collections::HashMap;
use super::agent_profile::{profile_for_role_name, should_enable_delegation, supervisor_profile};
use super::message_builder::MessageBuilder;
use super::nonstreaming_execution::execute_process;
use super::service::{ProcessResult, StreamResult};
use super::{AiRequest, StreamCallback};
use crate::error::Result;
use crate::tool::call::ToolError;
use crate::tool::registry::ToolRegistry;
use crate::tool::{ToolDefinition, ToolHandler, ToolParam, ToolSchema};
pub async fn execute_orchestrated_process(
request: AiRequest,
message_builder: &MessageBuilder,
tool_registry: &Option<ToolRegistry>,
ai_base_url: Option<String>,
ai_api_key: Option<String>,
) -> Result<ProcessResult> {
if request
.execution_profile
.as_ref()
.is_some_and(|p| p.disable_orchestration)
{
return execute_process(
request,
message_builder,
tool_registry,
ai_base_url,
ai_api_key,
)
.await;
}
let tools = request.tools.clone().unwrap_or_default();
if !should_enable_delegation(&request.input, !tools.is_empty()) {
return execute_process(
request,
message_builder,
tool_registry,
ai_base_url,
ai_api_key,
)
.await;
}
let mut enhanced_registry = tool_registry.clone().unwrap_or_default();
register_call_sub_agent_tool(
&mut enhanced_registry,
&request,
message_builder,
tool_registry,
ai_base_url.clone(),
ai_api_key.clone(),
);
let mut supervisor_request = request.clone();
let profile = supervisor_profile();
supervisor_request.execution_profile = Some(profile.clone());
supervisor_request.tools = Some(enhanced_registry.to_openai_tools());
supervisor_request.temperature = profile.temperature.unwrap_or(request.temperature);
supervisor_request.max_tokens = profile.max_tokens.unwrap_or(request.max_tokens);
supervisor_request.top_p = profile.top_p.unwrap_or(request.top_p);
supervisor_request.frequency_penalty = profile
.frequency_penalty
.unwrap_or(request.frequency_penalty);
supervisor_request.presence_penalty =
profile.presence_penalty.unwrap_or(request.presence_penalty);
execute_process(
supervisor_request,
message_builder,
&Some(enhanced_registry),
ai_base_url,
ai_api_key,
)
.await
}
pub async fn execute_orchestrated_stream(
request: AiRequest,
on_chunk: StreamCallback,
message_builder: &MessageBuilder,
tool_registry: &Option<ToolRegistry>,
ai_base_url: Option<String>,
ai_api_key: Option<String>,
) -> Result<StreamResult> {
if request
.execution_profile
.as_ref()
.is_some_and(|p| p.disable_orchestration)
{
return super::streaming_execution::execute_process_stream(
request,
on_chunk,
message_builder,
tool_registry,
ai_base_url,
ai_api_key,
)
.await;
}
let tools = request.tools.clone().unwrap_or_default();
if !should_enable_delegation(&request.input, !tools.is_empty()) {
return super::streaming_execution::execute_process_stream(
request,
on_chunk,
message_builder,
tool_registry,
ai_base_url,
ai_api_key,
)
.await;
}
let mut enhanced_registry = tool_registry.clone().unwrap_or_default();
register_call_sub_agent_tool(
&mut enhanced_registry,
&request,
message_builder,
tool_registry,
ai_base_url.clone(),
ai_api_key.clone(),
);
let mut supervisor_request = request.clone();
let profile = supervisor_profile();
supervisor_request.execution_profile = Some(profile.clone());
supervisor_request.tools = Some(enhanced_registry.to_openai_tools());
supervisor_request.temperature = profile.temperature.unwrap_or(request.temperature);
supervisor_request.max_tokens = profile.max_tokens.unwrap_or(request.max_tokens);
supervisor_request.top_p = profile.top_p.unwrap_or(request.top_p);
supervisor_request.frequency_penalty = profile
.frequency_penalty
.unwrap_or(request.frequency_penalty);
supervisor_request.presence_penalty =
profile.presence_penalty.unwrap_or(request.presence_penalty);
super::streaming_execution::execute_process_stream(
supervisor_request,
on_chunk,
message_builder,
&Some(enhanced_registry),
ai_base_url,
ai_api_key,
)
.await
}
fn register_call_sub_agent_tool(
registry: &mut ToolRegistry,
request: &AiRequest,
message_builder: &MessageBuilder,
original_registry: &Option<ToolRegistry>,
ai_base_url: Option<String>,
ai_api_key: Option<String>,
) {
let captured_request = request.clone();
let captured_message_builder = message_builder.clone();
let captured_original_registry = original_registry.clone();
let captured_base_url = ai_base_url;
let captured_api_key = ai_api_key;
registry.register(
ToolDefinition::new("call_sub_agent")
.description(
"Delegate a task to a specialist sub-agent and receive its output.\n\
Available roles:\n\
- researcher: Gathers facts, evidence, and data. Best for finding information and searching code.\n\
- analyst: Builds explanations, highlights causal links and tradeoffs. Best for reasoning about implications.\n\
- reviewer: Stress-tests proposals, identifies risks and contradictions. Best for quality checks.\n\
- architect: Maps systems, dependencies, boundaries, and design tradeoffs. Best for architecture decisions.\n\
- debugger: Finds root causes, suspect changes, and validation paths. Best for bugs and regressions.\n\
- implementer: Converts requirements into concrete implementation steps. Best for execution planning.\n\
- tester: Designs validation and regression coverage. Best for test strategy.\n\
- security: Reviews auth, data exposure, injection, dependency, and abuse risks. Best for sensitive changes.\n\
Provide a clear, focused task description so the sub-agent knows exactly what to investigate.",
)
.parameters(ToolSchema {
schema_type: "object".into(),
properties: Some({
let mut p = HashMap::new();
p.insert(
"role".into(),
ToolParam {
name: "role".into(),
param_type: "string".into(),
description: Some(
"The sub-agent role to delegate to: researcher, analyst, reviewer, architect, debugger, implementer, tester, or security.".into(),
),
required: true,
properties: None,
items: None,
},
);
p.insert(
"task".into(),
ToolParam {
name: "task".into(),
param_type: "string".into(),
description: Some(
"The specific task or question for the sub-agent. Be precise and focused.".into(),
),
required: true,
properties: None,
items: None,
},
);
p
}),
required: Some(vec!["role".into(), "task".into()]),
}),
ToolHandler::new(move |_ctx, args| {
// Extract owned values from args before async move (avoid borrowing across boundary)
let role = args
.get("role")
.and_then(|v| v.as_str())
.unwrap_or("researcher")
.to_owned();
let task = args
.get("task")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_owned();
let profile = profile_for_role_name(role.as_str());
let mut sub_request = captured_request.clone();
sub_request.input = format!(
"Sub-agent role: {role}\n\nTask:\n{task}\n\nOriginal user request:\n{}\n\nInstructions:\nFocus only on your assigned task. Return concise, evidence-backed findings.",
captured_request.input
);
sub_request.execution_profile = Some(profile.clone());
sub_request.tools = Some(filter_tools_for_sub_agent(
&captured_request.tools,
&profile.allowed_tools,
));
sub_request.max_tool_depth = profile
.max_tool_depth
.unwrap_or(captured_request.max_tool_depth);
sub_request.temperature = profile.temperature.unwrap_or(captured_request.temperature);
sub_request.max_tokens = profile.max_tokens.unwrap_or(captured_request.max_tokens);
sub_request.top_p = profile.top_p.unwrap_or(captured_request.top_p);
sub_request.frequency_penalty = profile
.frequency_penalty
.unwrap_or(captured_request.frequency_penalty);
sub_request.presence_penalty = profile
.presence_penalty
.unwrap_or(captured_request.presence_penalty);
// Clone captured values for this invocation so the Fn closure retains them
let mb = captured_message_builder.clone();
let sub_registry = captured_original_registry.clone();
let base = captured_base_url.clone();
let key = captured_api_key.clone();
Box::pin(async move {
let result = execute_process(sub_request, &mb, &sub_registry, base, key).await;
match result {
Ok(r) => Ok(serde_json::json!({
"role": role,
"output": r.content,
"input_tokens": r.input_tokens,
"output_tokens": r.output_tokens,
})),
Err(e) => Err(ToolError::ExecutionError(format!(
"Sub-agent '{}' execution failed: {}",
role, e
))),
}
})
}),
);
}
/// Filter the original tool definitions by the sub-agent's allowed list,
/// always excluding `call_sub_agent` to prevent recursive delegation.
fn filter_tools_for_sub_agent(
original_tools: &Option<Vec<serde_json::Value>>,
allowed_tools: &Option<Vec<String>>,
) -> Vec<serde_json::Value> {
let Some(tools) = original_tools else {
return Vec::new();
};
let allowed = allowed_tools.as_ref().map(|list| {
list.iter()
.filter(|n| *n != "call_sub_agent")
.cloned()
.collect::<Vec<String>>()
});
match allowed {
Some(allowed_list) if !allowed_list.is_empty() => tools
.iter()
.filter(|tool| {
let name = tool
.get("function")
.and_then(|f| f.get("name"))
.and_then(|v| v.as_str())
.unwrap_or("");
allowed_list.iter().any(|allowed| allowed == name)
})
.cloned()
.collect(),
_ => tools
.iter()
.filter(|tool| {
tool.get("function")
.and_then(|f| f.get("name"))
.and_then(|v| v.as_str())
.is_some_and(|name| name != "call_sub_agent")
})
.cloned()
.collect(),
}
}

View File

@ -1,233 +0,0 @@
use futures::StreamExt;
use models::rooms::room_ai;
use rig::agent::{AgentBuilder, MultiTurnStreamItem};
use rig::client::CompletionClient;
use rig::streaming::{StreamedAssistantContent, StreamingPrompt};
use sea_orm::*;
use uuid::Uuid;
use super::AiRequest;
use super::session_recording::record_ai_session;
use crate::client::AiClientConfig;
use crate::error::{AgentError, Result};
use crate::react::types::Action as ReactAction;
use crate::react::{DEFAULT_SYSTEM_PROMPT, ReactStep};
use crate::tool::{RecordingTool, registry::ToolRegistry};
pub async fn execute_process_react<C, Fut>(
request: &AiRequest,
mut on_chunk: C,
tool_registry: &ToolRegistry,
ai_base_url: Option<String>,
ai_api_key: Option<String>,
room_preamble: Option<&str>,
message_producer: Option<queue::MessageProducer>,
) -> Result<(String, i64, i64)>
where
C: FnMut(ReactStep) -> Fut + Send,
Fut: std::future::Future<Output = ()> + Send,
{
let base_url = ai_base_url.unwrap_or_else(|| "https://api.openai.com".into());
let api_key = ai_api_key.unwrap_or_default();
let client_config = AiClientConfig::new(api_key).with_base_url(base_url);
let db = request.db.clone();
let cache = request.cache.clone();
let cfg = request.config.clone();
let room_id = request.room.id;
let sender_uid = request.sender.uid;
let project_id = request.project.id;
let ai_model_id = request.model.id;
let ai_model_name = request.model.name.clone();
let sent_in_turn = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let session_id = Uuid::now_v7();
let session_start = std::time::Instant::now();
let version_id = room_ai::Entity::find()
.filter(room_ai::Column::Room.eq(request.room.id))
.filter(room_ai::Column::Model.eq(request.model.id))
.one(&request.db)
.await
.ok()
.flatten()
.and_then(|r| r.version);
let mut tools: Vec<Box<dyn rig::tool::ToolDyn + 'static>> = Vec::new();
for def in tool_registry.definitions() {
let name = def.name.clone();
if let Some(handler) = tool_registry.get(&name) {
let adapter = crate::tool::RigToolAdapter::new(
handler.clone(),
def.clone(),
db.clone(),
cache.clone(),
cfg.clone(),
room_id,
Some(sender_uid),
project_id,
message_producer.clone(),
Some(ai_model_id),
Some(ai_model_name.clone()),
sent_in_turn.clone(),
);
tools.push(Box::new(RecordingTool::new(
Box::new(adapter),
db.clone(),
session_id,
sender_uid,
)));
}
}
let rig_client = client_config.build_rig_client();
let model = rig_client.completion_model(&request.model.name);
// General rules first (strong LLM attention), room context appended after
// so that output-format rules aren't buried behind long room preamble.
let preamble = match room_preamble {
Some(rp) => format!("{}\n{}", DEFAULT_SYSTEM_PROMPT, rp),
None => DEFAULT_SYSTEM_PROMPT.to_string(),
};
let agent = AgentBuilder::new(model)
.preamble(&preamble)
.tools(tools)
.default_max_turns(request.max_tool_depth)
.build();
let stream = agent
.stream_prompt(&request.input)
.with_history(Vec::<rig::completion::Message>::new())
.multi_turn(request.max_tool_depth)
.await;
tokio::pin!(stream);
let mut step_count = 0usize;
let mut final_content = String::new();
let mut total_input_tokens: i64 = 0;
let mut total_output_tokens: i64 = 0;
while let Some(item) = stream.next().await {
match item {
Ok(MultiTurnStreamItem::StreamAssistantItem(StreamedAssistantContent::Text(text))) => {
step_count += 1;
let t = text.text;
on_chunk(ReactStep::Answer {
step: step_count,
answer: t.clone(),
})
.await;
final_content.push_str(&t);
}
Ok(MultiTurnStreamItem::StreamAssistantItem(StreamedAssistantContent::Reasoning(
reasoning,
))) => {
let reasoning_text: String = reasoning
.content
.iter()
.filter_map(|c| match c {
rig::completion::message::ReasoningContent::Text { text, .. } => {
Some(text.as_str())
}
_ => None,
})
.collect::<Vec<_>>()
.join("");
if !reasoning_text.is_empty() {
step_count += 1;
on_chunk(ReactStep::Thought {
step: step_count,
thought: reasoning_text,
})
.await;
}
}
Ok(MultiTurnStreamItem::StreamAssistantItem(
StreamedAssistantContent::ReasoningDelta { reasoning, .. },
)) => {
if !reasoning.is_empty() {
step_count += 1;
on_chunk(ReactStep::Thought {
step: step_count,
thought: reasoning,
})
.await;
}
}
Ok(MultiTurnStreamItem::StreamAssistantItem(StreamedAssistantContent::ToolCall {
tool_call,
..
})) => {
step_count += 1;
let args: serde_json::Value = match &tool_call.function.arguments {
serde_json::Value::String(s) => {
serde_json::from_str(s).unwrap_or(serde_json::Value::Null)
}
v => v.clone(),
};
on_chunk(ReactStep::Action {
step: step_count,
action: ReactAction::new(&tool_call.function.name, args),
})
.await;
}
Ok(MultiTurnStreamItem::StreamUserItem(
rig::streaming::StreamedUserContent::ToolResult { tool_result, .. },
)) => {
step_count += 1;
let obs = tool_result_content_to_string(&tool_result.content);
on_chunk(ReactStep::Observation {
step: step_count,
observation: obs,
})
.await;
}
Ok(MultiTurnStreamItem::FinalResponse(resp)) => {
let usage = resp.usage();
total_input_tokens = usage.input_tokens as i64;
total_output_tokens = usage.output_tokens as i64;
}
Err(e) => {
let err_msg = format!("rig agent stream error: {}", e);
return Err(AgentError::OpenAi(err_msg));
}
_ => {}
}
}
let elapsed_ms = session_start.elapsed().as_millis() as i64;
record_ai_session(
&request.cache,
&request.db,
request.project.id,
request.sender.uid,
session_id,
request.room.id,
request.model.id,
version_id.unwrap_or_default(),
total_input_tokens,
total_output_tokens,
elapsed_ms,
)
.await;
Ok((final_content, total_input_tokens, total_output_tokens))
}
/// Extract text from rig's ToolResultContent, ignoring images.
fn tool_result_content_to_string(
content: &rig::one_or_many::OneOrMany<rig::completion::message::ToolResultContent>,
) -> String {
use rig::completion::message::ToolResultContent;
content
.iter()
.filter_map(|item| {
if let ToolResultContent::Text(t) = item {
Some(t.text.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n")
}

View File

@ -1,268 +0,0 @@
use super::message_builder::MessageBuilder;
use super::{AiRequest, StreamCallback};
use crate::client::AiClientConfig;
use crate::client::StreamChunk;
use crate::compact::CompactService;
use crate::embed::EmbedService;
use crate::error::Result;
use crate::perception::PerceptionService;
use crate::tool::registry::ToolRegistry;
use queue::MessageProducer;
/// Result from streaming AI response.
pub struct StreamResult {
pub content: String,
pub reasoning_content: String,
pub input_tokens: i64,
pub output_tokens: i64,
/// All chunks in arrival order — preserves ReAct multi-cycle ordering.
pub chunks: Vec<StreamChunk>,
}
/// Result from non-streaming AI response.
pub struct ProcessResult {
pub content: String,
pub input_tokens: i64,
pub output_tokens: i64,
}
/// Service for handling AI chat requests in rooms.
pub struct ChatService {
ai_base_url: Option<String>,
ai_api_key: Option<String>,
message_builder: MessageBuilder,
tool_registry: Option<ToolRegistry>,
}
impl ChatService {
pub fn new() -> Self {
Self {
ai_base_url: None,
ai_api_key: None,
message_builder: MessageBuilder::new(),
tool_registry: None,
}
}
pub fn with_ai_client_config(mut self, config: AiClientConfig) -> Self {
self.ai_base_url = config.base_url.clone();
self.ai_api_key = Some(config.api_key.clone());
self
}
pub fn with_compact_service(mut self, compact_service: CompactService) -> Self {
self.message_builder = self.message_builder.with_compact_service(compact_service);
self
}
pub fn with_embed_service(mut self, embed_service: EmbedService) -> Self {
self.message_builder = self.message_builder.with_embed_service(embed_service);
self
}
pub fn with_perception_service(mut self, perception_service: PerceptionService) -> Self {
self.message_builder = self
.message_builder
.with_perception_service(perception_service);
self
}
pub fn with_tool_registry(mut self, registry: ToolRegistry) -> Self {
self.tool_registry = Some(registry);
self
}
/// Returns all registered tools as JSON tool definitions.
pub fn tools(&self) -> Vec<serde_json::Value> {
self.tool_registry
.as_ref()
.map(|r| r.to_openai_tools())
.unwrap_or_default()
}
/// Build a RigToolSet from the registered tool registry.
///
/// This enables using the same tools with `RigAgentService` via rig's native Agent.
/// The context (db, cache, config, room_id, sender_id) is passed through to each
/// tool handler at creation time.
#[cfg(feature = "rig")]
pub fn rig_toolset(
&self,
db: db::database::AppDatabase,
cache: db::cache::AppCache,
config: config::AppConfig,
room_id: uuid::Uuid,
sender_id: Option<uuid::Uuid>,
project_id: uuid::Uuid,
) -> Option<crate::RigToolSet> {
self.tool_registry.as_ref().map(|registry| {
crate::RigToolSet::from_registry(
registry,
db,
cache,
config,
room_id,
sender_id,
project_id,
None,
None,
None,
std::sync::Arc::new(std::sync::Mutex::new(Vec::new())),
)
})
}
/// Get a reference to the underlying ToolRegistry.
pub fn tool_registry(&self) -> Option<&ToolRegistry> {
self.tool_registry.as_ref()
}
pub async fn build_room_optimized_context_text(
&self,
request: &AiRequest,
) -> Result<(String, Option<i64>)> {
self.message_builder
.build_room_optimized_context_text(request)
.await
}
/// Process AI request without streaming (tool-call loop with non-streaming API).
pub async fn process(&self, request: AiRequest) -> Result<ProcessResult> {
super::orchestrator::execute_orchestrated_process(
request,
&self.message_builder,
&self.tool_registry,
self.ai_base_url.clone(),
self.ai_api_key.clone(),
)
.await
}
/// Process AI request with streaming (tool-call loop with streaming API, incremental chunks).
pub async fn process_stream(
&self,
request: AiRequest,
on_chunk: StreamCallback,
) -> Result<StreamResult> {
super::orchestrator::execute_orchestrated_stream(
request,
on_chunk,
&self.message_builder,
&self.tool_registry,
self.ai_base_url.clone(),
self.ai_api_key.clone(),
)
.await
}
/// Process AI request for room context — direct execution path (bypasses orchestrator).
///
/// Room AI uses a fast single-agent loop: all tools available, no multi-agent delegation.
/// Merges `room_tools` (send_message, retract_message) into the base registry,
/// then runs `execute_process` / `execute_process_stream` directly.
pub async fn process_room(
&self,
request: AiRequest,
room_tools: ToolRegistry,
) -> Result<ProcessResult> {
let mut merged = self.tool_registry.clone().unwrap_or_default();
merged.merge(room_tools);
super::nonstreaming_execution::execute_process(
request,
&self.message_builder,
&Some(merged),
self.ai_base_url.clone(),
self.ai_api_key.clone(),
)
.await
}
/// Process AI request for room context with streaming — direct execution path.
///
/// Same as `process_room` but with streaming response. Bypasses orchestrator,
/// gives the room AI all tools (base + room) for fast single-agent execution.
pub async fn process_room_stream(
&self,
request: AiRequest,
on_chunk: StreamCallback,
room_tools: ToolRegistry,
) -> Result<StreamResult> {
let mut merged = self.tool_registry.clone().unwrap_or_default();
merged.merge(room_tools);
super::streaming_execution::execute_process_stream(
request,
on_chunk,
&self.message_builder,
&Some(merged),
self.ai_base_url.clone(),
self.ai_api_key.clone(),
)
.await
}
/// Process AI request via rig-based ReAct streaming loop.
pub async fn process_react<C, Fut>(
&self,
request: &AiRequest,
on_chunk: C,
) -> Result<(String, i64, i64)>
where
C: FnMut(crate::react::ReactStep) -> Fut + Send,
Fut: std::future::Future<Output = ()> + Send,
{
let Some(registry) = &self.tool_registry else {
return Err(crate::error::AgentError::Internal(
"no tool registry registered".into(),
));
};
super::react_execution::execute_process_react(
request,
on_chunk,
registry,
self.ai_base_url.clone(),
self.ai_api_key.clone(),
None,
None,
)
.await
}
/// Process AI request via rig-based ReAct streaming loop with room-specific tools.
///
/// Merges `room_tools` (e.g. `send_message`, `retract_message`) into the base
/// tool registry on-the-fly. The `room_preamble` is prepended to the default
/// system prompt to instruct the AI about room communication rules.
/// `message_producer` enables tools to publish events via the message queue.
pub async fn process_react_room<C, Fut>(
&self,
request: &AiRequest,
on_chunk: C,
room_tools: ToolRegistry,
room_preamble: Option<&str>,
message_producer: Option<MessageProducer>,
) -> Result<(String, i64, i64)>
where
C: FnMut(crate::react::ReactStep) -> Fut + Send,
Fut: std::future::Future<Output = ()> + Send,
{
let Some(registry) = &self.tool_registry else {
return Err(crate::error::AgentError::Internal(
"no tool registry registered".into(),
));
};
let mut merged = registry.clone();
merged.merge(room_tools);
super::react_execution::execute_process_react(
request,
on_chunk,
&merged,
self.ai_base_url.clone(),
self.ai_api_key.clone(),
room_preamble,
message_producer,
)
.await
}
}

View File

@ -1,217 +0,0 @@
//! Agent state machine — tracks lifecycle of a single AI agent invocation.
//!
//! States: Idle → Thinking → ToolCall → Thinking → ... → Answering | Error
//! The Thinking ↔ ToolCall cycle repeats until max tool depth or final answer.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
/// Current phase of an agent's execution lifecycle.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum AgentState {
/// Agent is idle, waiting for input
Idle,
/// Agent is reasoning/thinking (may produce thinking chunks)
Thinking {
started_at: DateTime<Utc>,
tool_depth: u32,
},
/// Agent is executing a tool call
ToolCall {
tool_name: String,
started_at: DateTime<Utc>,
},
/// Agent is returning the final answer
Answering {
/// Accumulated answer content so far
content_chars: u64,
started_at: DateTime<Utc>,
},
/// Agent encountered a non-recoverable error
Error { message: String, tool_depth: u32 },
}
impl AgentState {
pub fn is_terminal(&self) -> bool {
matches!(
self,
AgentState::Answering { .. } | AgentState::Error { .. }
)
}
pub fn is_idle(&self) -> bool {
matches!(self, AgentState::Idle)
}
pub fn current_phase(&self) -> &'static str {
match self {
AgentState::Idle => "idle",
AgentState::Thinking { .. } => "thinking",
AgentState::ToolCall { .. } => "tool_call",
AgentState::Answering { .. } => "answering",
AgentState::Error { .. } => "error",
}
}
}
/// State machine for agent lifecycle transitions.
pub struct AgentRuntime {
state: AgentState,
max_tool_depth: u32,
current_depth: u32,
}
impl AgentRuntime {
pub fn new(max_tool_depth: u32) -> Self {
Self {
state: AgentState::Idle,
max_tool_depth,
current_depth: 0,
}
}
pub fn state(&self) -> &AgentState {
&self.state
}
/// Transition from Idle → Thinking
pub fn start_thinking(&mut self) {
debug_assert!(self.state.is_idle(), "must be Idle to start thinking");
self.current_depth = 0;
self.state = AgentState::Thinking {
started_at: Utc::now(),
tool_depth: 0,
};
}
/// Transition from Thinking → ToolCall (increments tool depth)
pub fn start_tool_call(&mut self, tool_name: String) -> Result<(), &'static str> {
if !matches!(self.state, AgentState::Thinking { .. }) {
return Err("must be Thinking to start tool call");
}
if self.current_depth >= self.max_tool_depth {
return Err("max tool depth reached");
}
self.state = AgentState::ToolCall {
tool_name,
started_at: Utc::now(),
};
Ok(())
}
/// Transition from ToolCall → Thinking (back after tool result)
pub fn complete_tool_call(&mut self) -> Result<(), &'static str> {
if !matches!(self.state, AgentState::ToolCall { .. }) {
return Err("must be ToolCall to complete");
}
self.current_depth += 1;
self.state = AgentState::Thinking {
started_at: Utc::now(),
tool_depth: self.current_depth,
};
Ok(())
}
/// Transition to Answering (terminal)
pub fn start_answer(&mut self) {
self.state = AgentState::Answering {
content_chars: 0,
started_at: Utc::now(),
};
}
pub fn append_answer(&mut self, content: &str) {
if let AgentState::Answering { content_chars, .. } = &mut self.state {
*content_chars += content.len() as u64;
}
}
/// Transition to Error (terminal)
pub fn fail(&mut self, message: String) {
self.state = AgentState::Error {
message,
tool_depth: self.current_depth,
};
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_starts_idle() {
let rt = AgentRuntime::new(10);
assert!(rt.state().is_idle());
assert_eq!(rt.state().current_phase(), "idle");
}
#[test]
fn test_idle_to_thinking() {
let mut rt = AgentRuntime::new(10);
rt.start_thinking();
assert_eq!(rt.state().current_phase(), "thinking");
assert!(!rt.state().is_terminal());
}
#[test]
fn test_thinking_to_tool_call_and_back() {
let mut rt = AgentRuntime::new(10);
rt.start_thinking();
rt.start_tool_call("search".into()).unwrap();
assert_eq!(rt.state().current_phase(), "tool_call");
rt.complete_tool_call().unwrap();
assert_eq!(rt.state().current_phase(), "thinking");
}
#[test]
fn test_thinking_to_answer() {
let mut rt = AgentRuntime::new(10);
rt.start_thinking();
rt.start_answer();
assert_eq!(rt.state().current_phase(), "answering");
assert!(rt.state().is_terminal());
}
#[test]
fn test_append_answer_tracks_chars() {
let mut rt = AgentRuntime::new(10);
rt.start_thinking();
rt.start_answer();
rt.append_answer("hello");
if let AgentState::Answering { content_chars, .. } = rt.state() {
assert_eq!(*content_chars, 5);
} else {
panic!("expected Answering state");
}
}
#[test]
fn test_error_is_terminal() {
let mut rt = AgentRuntime::new(10);
rt.start_thinking();
rt.fail("something broke".into());
assert_eq!(rt.state().current_phase(), "error");
assert!(rt.state().is_terminal());
}
#[test]
fn test_transition_from_wrong_state() {
let mut rt = AgentRuntime::new(10);
// Can't start tool call from Idle
assert!(rt.start_tool_call("tool".into()).is_err());
// Can't complete tool call from Idle
assert!(rt.complete_tool_call().is_err());
}
#[test]
fn test_max_depth_rejected() {
let mut rt = AgentRuntime::new(2);
rt.start_thinking();
rt.start_tool_call("tool1".into()).unwrap();
rt.complete_tool_call().unwrap();
rt.start_tool_call("tool2".into()).unwrap();
rt.complete_tool_call().unwrap();
assert!(rt.start_tool_call("tool3".into()).is_err());
}
}

View File

@ -1,511 +0,0 @@
use models::projects::project_skill;
use models::rooms::room_ai;
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
use std::pin::Pin;
use std::sync::Arc;
use uuid::Uuid;
use super::message_builder::MessageBuilder;
use super::service::StreamResult;
use super::session_recording::record_ai_session;
use super::{AiChunkType, AiRequest, AiStreamChunk, StreamCallback};
use crate::client::AiClientConfig;
use crate::client::types::{ChatRequestMessage, ToolCall};
use crate::client::{StreamChunk, StreamChunkType, StreamedToolCall, call_stream};
use crate::error::Result;
use crate::perception::{SkillEntry, ToolCallEvent};
use crate::tool::{ToolCall as AgentToolCall, ToolContext, ToolExecutor};
type SharedCallback = Arc<
dyn Fn(AiStreamChunk) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync,
>;
pub async fn execute_process_stream(
request: AiRequest,
on_chunk: StreamCallback,
message_builder: &MessageBuilder,
tool_registry: &Option<crate::tool::registry::ToolRegistry>,
ai_base_url: Option<String>,
ai_api_key: Option<String>,
) -> Result<StreamResult> {
let on_chunk: SharedCallback = Arc::from(on_chunk);
let tools: Vec<serde_json::Value> = request.tools.clone().unwrap_or_default();
let tools_enabled = !tools.is_empty();
let max_tool_depth = request.max_tool_depth;
let mut messages = message_builder.build_messages(&request).await?;
let room_ai_config = room_ai::Entity::find()
.filter(room_ai::Column::Room.eq(request.room.id))
.filter(room_ai::Column::Model.eq(request.model.id))
.one(&request.db)
.await?;
let model_name = request.model.name.clone();
let profile = request.execution_profile.as_ref();
let temperature = profile
.and_then(|p| p.temperature.map(|v| v as f32))
.or_else(|| {
room_ai_config
.as_ref()
.and_then(|r| r.temperature.map(|v| v as f32))
})
.unwrap_or(request.temperature as f32);
let max_tokens = profile
.and_then(|p| p.max_tokens.map(|v| v as u32))
.or_else(|| {
room_ai_config
.as_ref()
.and_then(|r| r.max_tokens.map(|v| v as u32))
})
.unwrap_or(request.max_tokens as u32);
let mut tool_depth = 0;
let mut total_input_tokens = 0i64;
let mut total_output_tokens = 0i64;
let session_id = Uuid::now_v7();
let session_start = std::time::Instant::now();
let version_id = room_ai_config.as_ref().and_then(|r| r.version);
let config = AiClientConfig::new(ai_api_key.unwrap_or_default())
.with_base_url(ai_base_url.unwrap_or_else(|| "https://api.openai.com".into()));
let mut full_content = String::new();
let mut all_chunks: Vec<StreamChunk> = Vec::new();
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<StreamedToolCall>();
loop {
let on_chunk_cb = on_chunk.clone();
let on_chunk_cb2 = on_chunk.clone();
let tx_arc = Arc::new(tx.clone());
let tx_arc2 = tx_arc.clone();
let response = call_stream(
&messages,
&model_name,
&config,
temperature,
max_tokens,
if tools_enabled { Some(&tools) } else { None },
None,
Arc::new(move |delta| {
let content = delta.to_string();
let fut = on_chunk_cb(AiStreamChunk {
content,
done: false,
chunk_type: AiChunkType::Answer,
metadata: None,
children_id: None,
});
fut
}),
Arc::new(move |delta| {
let fut = on_chunk_cb2(AiStreamChunk {
content: delta.to_string(),
done: false,
chunk_type: AiChunkType::Thinking,
metadata: None,
children_id: None,
});
fut
}),
Arc::new(move |tc: &StreamedToolCall| {
let tx = tx_arc2.clone();
let tc_owned = tc.clone();
Box::pin(async move {
let _ = tx.send(tc_owned);
}) as Pin<Box<dyn std::future::Future<Output = ()> + Send>>
}),
)
.await?;
total_input_tokens += response.input_tokens;
total_output_tokens += response.output_tokens;
all_chunks.extend(response.chunks.clone());
let has_tool_calls = tools_enabled && !response.tool_calls.is_empty();
if !has_tool_calls {
return handle_final_answer(
response,
all_chunks,
&request,
session_id,
version_id,
total_input_tokens,
total_output_tokens,
session_start,
)
.await;
}
full_content.push_str(&response.content);
let tool_calls: Vec<ToolCall> = response
.tool_calls
.iter()
.map(|tc| ToolCall {
id: tc.id.clone(),
type_: "function".into(),
function: crate::client::types::ToolCallFunction {
name: tc.name.clone(),
arguments: tc.arguments.clone(),
},
})
.collect();
messages.push(ChatRequestMessage::assistant(
Some(response.content.clone()),
Some(tool_calls.clone()),
));
drain_tool_call_notifications(&mut rx, &on_chunk, &mut all_chunks).await;
let calls: Vec<AgentToolCall> = response
.tool_calls
.iter()
.map(|tc| AgentToolCall {
id: tc.id.clone(),
name: tc.name.clone(),
arguments: tc.arguments.clone(),
})
.collect();
let tool_messages = execute_streaming_tools(
&request,
&calls,
session_id,
&on_chunk,
&mut all_chunks,
tool_registry,
message_builder,
)
.await;
messages.extend(tool_messages);
inject_passive_skills_stream(
&request,
message_builder,
&response.tool_calls,
&mut messages,
)
.await;
tool_depth += 1;
if tool_depth >= max_tool_depth {
let max_depth_text = format!(
"[AI reached maximum tool depth ({}) — no final answer produced]",
max_tool_depth
);
on_chunk(AiStreamChunk {
content: max_depth_text.clone(),
done: true,
chunk_type: AiChunkType::Answer,
metadata: None,
children_id: None,
})
.await;
all_chunks.push(StreamChunk {
chunk_type: StreamChunkType::Answer,
content: max_depth_text,
});
record_ai_session(
&request.cache,
&request.db,
request.project.id,
request.sender.uid,
session_id,
request.room.id,
request.model.id,
version_id.unwrap_or_default(),
total_input_tokens,
total_output_tokens,
session_start.elapsed().as_millis() as i64,
)
.await;
return Ok(StreamResult {
content: full_content,
reasoning_content: String::new(),
input_tokens: 0,
output_tokens: 0,
chunks: all_chunks,
});
}
}
}
async fn drain_tool_call_notifications(
rx: &mut tokio::sync::mpsc::UnboundedReceiver<StreamedToolCall>,
on_chunk: &SharedCallback,
all_chunks: &mut Vec<StreamChunk>,
) {
loop {
match rx.try_recv() {
Ok(tc) => {
let args_display = if tc.arguments.len() > 100 {
let end = tc
.arguments
.char_indices()
.map(|(i, _)| i)
.take_while(|&i| i <= 100)
.last()
.unwrap_or(100);
format!("{}...", &tc.arguments[..end])
} else {
tc.arguments.clone()
};
let tool_display = format!("🔧 {}({})", tc.name, args_display);
// Parse arguments JSON for structured metadata
let args_json =
serde_json::from_str(&tc.arguments).unwrap_or(serde_json::json!({}));
let metadata = serde_json::json!({
"tool": tc.name,
"args": args_json,
"display": tool_display.clone(),
});
on_chunk(AiStreamChunk {
content: tool_display.clone(),
done: false,
chunk_type: AiChunkType::ToolCall,
metadata: Some(metadata),
children_id: None,
})
.await;
all_chunks.push(StreamChunk {
chunk_type: StreamChunkType::ToolCall,
content: tool_display,
});
}
Err(tokio::sync::mpsc::error::TryRecvError::Empty) => break,
Err(tokio::sync::mpsc::error::TryRecvError::Disconnected) => break,
}
}
}
async fn execute_streaming_tools(
request: &AiRequest,
calls: &[AgentToolCall],
session_id: Uuid,
on_chunk: &SharedCallback,
all_chunks: &mut Vec<StreamChunk>,
tool_registry: &Option<crate::tool::registry::ToolRegistry>,
message_builder: &MessageBuilder,
) -> Vec<ChatRequestMessage> {
let mut tool_messages = Vec::new();
let mut ctx = ToolContext::new(
request.db.clone(),
request.cache.clone(),
request.config.clone(),
request.room.id,
Some(request.sender.uid),
)
.with_project(request.project.id);
if let Some(es) = &message_builder.embed_service {
ctx = ctx.with_embed_service(es.clone());
}
if let Some(registry) = tool_registry {
ctx.registry_mut().merge(registry.clone());
}
let recorder =
crate::tool::recorder::ToolCallRecorder::with_session(request.db.clone(), session_id);
let mut join_set = tokio::task::JoinSet::new();
for call in calls {
let call_clone = call.clone();
let mut ctx_clone = ctx.clone();
let sender_uid = request.sender.uid;
let recorder_clone = recorder.clone();
join_set.spawn(async move {
let start = std::time::Instant::now();
let executor = ToolExecutor::new();
let res = executor
.execute_batch(vec![call_clone.clone()], &mut ctx_clone)
.await;
(call_clone, res, start.elapsed(), sender_uid, recorder_clone)
});
}
let heartbeat_dur = std::time::Duration::from_secs(10);
while !join_set.is_empty() {
tokio::select! {
Some(res) = join_set.join_next() => {
if let Ok((call, results, elapsed, sender_uid, recorder)) = res {
match results {
Ok(results) => {
for result in &results {
let text = match &result.result { crate::tool::ToolResult::Ok(v) => v.to_string(), crate::tool::ToolResult::Error(msg) => msg.clone() };
let preview = if text.len() > 300 {
let end = text.char_indices().map(|(i, _)| i).take_while(|&i| i <= 300).last().unwrap_or(300);
format!("{}...", &text[..end])
} else { text.clone() };
tracing::debug!("tool_result: {} — {}", call.name, preview);
let is_error = matches!(result.result, crate::tool::ToolResult::Error(_));
let error_msg = match &result.result { crate::tool::ToolResult::Error(msg) => Some(msg.clone()), _ => None };
recorder.record(crate::tool::recorder::ToolCallRecord {
tool_call_id: call.id.clone(),
session_id: recorder.session_id(),
tool_name: call.name.clone(),
caller: sender_uid,
arguments: call.arguments_json().unwrap_or_default(),
status: if is_error { models::ai::ToolCallStatus::Failed } else { models::ai::ToolCallStatus::Success },
execution_time_ms: Some(elapsed.as_millis() as i64),
error_message: error_msg,
error_stack: None,
retry_count: 0
});
}
let success_display = format!("{}", call.name);
let result_preview: Vec<String> = results.iter().map(|r| {
match &r.result { crate::tool::ToolResult::Ok(v) => v.to_string(), crate::tool::ToolResult::Error(msg) => msg.clone() }
}).collect();
let metadata = serde_json::json!({
"tool": call.name,
"status": "ok",
"result": result_preview.join("\n").chars().take(500).collect::<String>(),
"display": success_display.clone(),
});
on_chunk(AiStreamChunk {
content: success_display.clone(),
done: false,
chunk_type: AiChunkType::ToolResult,
metadata: Some(metadata),
children_id: Some(call.id.clone()),
}).await;
all_chunks.push(StreamChunk { chunk_type: StreamChunkType::ToolCall, content: success_display });
let msgs = ToolExecutor::to_tool_messages(&results);
tool_messages.extend(msgs);
}
Err(e) => {
recorder.record(crate::tool::recorder::ToolCallRecord {
tool_call_id: call.id.clone(),
session_id: recorder.session_id(),
tool_name: call.name.clone(),
caller: sender_uid,
arguments: call.arguments_json().unwrap_or_default(),
status: models::ai::ToolCallStatus::Failed,
execution_time_ms: Some(elapsed.as_millis() as i64),
error_message: Some(e.to_string()),
error_stack: None,
retry_count: 0
});
let err_text = format!("[Tool call failed: {}]", e);
tracing::warn!(tool = %call.name, args = %call.arguments, error = %e, "tool_call_failed");
let err_display = format!("{} (failed)", call.name);
let metadata = serde_json::json!({
"tool": call.name,
"status": "error",
"result": e.to_string(),
"display": err_display.clone(),
});
on_chunk(AiStreamChunk {
content: err_display.clone(),
done: false,
chunk_type: AiChunkType::ToolResult,
metadata: Some(metadata),
children_id: None,
}).await;
all_chunks.push(StreamChunk { chunk_type: StreamChunkType::ToolCall, content: err_display });
tool_messages.push(ChatRequestMessage::tool(&call.id, &err_text));
}
}
}
},
_ = tokio::time::sleep(heartbeat_dur) => {
on_chunk(AiStreamChunk { content: String::new(), done: false, chunk_type: AiChunkType::ToolCall, metadata: None, children_id: None }).await;
}
}
}
tool_messages
}
async fn handle_final_answer(
response: crate::client::StreamResponse,
all_chunks: Vec<StreamChunk>,
request: &AiRequest,
session_id: Uuid,
version_id: Option<Uuid>,
total_input_tokens: i64,
total_output_tokens: i64,
session_start: std::time::Instant,
) -> Result<StreamResult> {
let full_content = response.content.clone();
// Don't push full content as a chunk — incremental deltas in
// response.chunks (already accumulated above) sum to the same text.
// merge_consecutive_blocks would concatenate delta_sum + full =
// 2× full, causing duplicate content in DB persistence.
record_ai_session(
&request.cache,
&request.db,
request.project.id,
request.sender.uid,
session_id,
request.room.id,
request.model.id,
version_id.unwrap_or_default(),
total_input_tokens,
total_output_tokens,
session_start.elapsed().as_millis() as i64,
)
.await;
Ok(StreamResult {
content: full_content,
reasoning_content: response.reasoning_content,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
chunks: all_chunks,
})
}
async fn inject_passive_skills_stream(
request: &AiRequest,
message_builder: &MessageBuilder,
tool_calls: &[StreamedToolCall],
messages: &mut Vec<ChatRequestMessage>,
) {
if let Ok(skills) = project_skill::Entity::find()
.filter(project_skill::Column::ProjectUuid.eq(request.project.id))
.filter(project_skill::Column::Enabled.eq(true))
.all(&request.db)
.await
{
let mut skill_entries: Vec<SkillEntry> = skills
.into_iter()
.map(|s| SkillEntry {
slug: s.slug,
name: s.name,
description: s.description,
content: s.content,
})
.collect();
for built_in in crate::skills::all_skills() {
if !skill_entries.iter().any(|s| s.slug == built_in.slug) {
skill_entries.push(SkillEntry {
slug: built_in.slug.to_string(),
name: built_in.name.to_string(),
description: Some(built_in.description.to_string()),
content: built_in.content.clone(),
});
}
}
let tool_events: Vec<ToolCallEvent> = tool_calls
.iter()
.map(|tc| ToolCallEvent {
tool_name: tc.name.clone(),
arguments: tc.arguments.clone(),
})
.collect();
let mut contexts = Vec::new();
for event in &tool_events {
if let Some(ctx) = message_builder
.perception_service
.passive
.detect(event, &skill_entries)
{
MessageBuilder::push_unique_skill_context(&mut contexts, ctx);
}
}
for ctx in contexts {
messages.push(ctx.to_system_message());
}
}
}

View File

@ -1,831 +0,0 @@
//! Unified AI client with built-in retry, token tracking, and session recording.
//!
//! Uses rig-core as the underlying AI provider library.
pub mod types;
pub use types::{ChatRequestMessage, ToolCall as ClientToolCall};
use std::pin::Pin;
use std::sync::Arc;
use std::time::Instant;
use uuid::Uuid;
use crate::error::{AgentError, Result};
use futures::StreamExt;
use rig::completion::message::{AssistantContent, Message as RigMessage};
use rig::completion::{CompletionModel, GetTokenUsage, ToolDefinition};
use rig::one_or_many::OneOrMany;
use rig::prelude::CompletionClient;
use rig::providers::openai;
/// AI call metrics — increments metrics crate counters for all AI calls.
#[derive(Debug, Clone, Default)]
pub struct AiMetrics;
impl AiMetrics {
pub fn new() -> Self {
Self
}
pub fn record_success(&self, input_tokens: i64, output_tokens: i64, has_function_call: bool) {
metrics::counter!("ai_calls_total").increment(1);
metrics::counter!("ai_calls_success").increment(1);
if input_tokens > 0 {
metrics::counter!("ai_input_tokens_total").increment(input_tokens as u64);
}
if output_tokens > 0 {
metrics::counter!("ai_output_tokens_total").increment(output_tokens as u64);
}
if has_function_call {
metrics::counter!("ai_function_calls_total").increment(1);
}
}
pub fn record_failure(&self) {
metrics::counter!("ai_calls_total").increment(1);
metrics::counter!("ai_calls_failure").increment(1);
}
}
/// Configuration for the AI client.
#[derive(Clone)]
pub struct AiClientConfig {
pub api_key: String,
pub base_url: Option<String>,
}
impl AiClientConfig {
pub fn new(api_key: String) -> Self {
Self {
api_key,
base_url: None,
}
}
pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
self.base_url = Some(base_url.into());
self
}
/// Build a rig OpenAI client from this config.
pub fn build_rig_client(&self) -> openai::Client {
let base = self
.base_url
.clone()
.unwrap_or_else(|| "https://api.openai.com".to_string());
openai::Client::builder()
.api_key(&self.api_key)
.base_url(&base)
.build()
.expect("Failed to build rig OpenAI client")
}
}
/// Response from an AI call, including usage statistics.
#[derive(Debug, Clone)]
pub struct AiCallResponse {
pub content: String,
pub input_tokens: i64,
pub output_tokens: i64,
pub latency_ms: i64,
pub tool_calls: Vec<ClientToolCall>,
pub tool_calls_finished: Vec<String>,
}
impl AiCallResponse {
pub fn total_tokens(&self) -> i64 {
self.input_tokens + self.output_tokens
}
}
/// Internal state for retry tracking.
#[derive(Debug)]
struct RetryState {
attempt: u32,
max_retries: u32,
max_backoff_ms: u64,
}
impl RetryState {
fn new(max_retries: u32) -> Self {
Self {
attempt: 0,
max_retries,
max_backoff_ms: 5000,
}
}
fn should_retry(&self) -> bool {
self.attempt < self.max_retries
}
fn backoff_duration(&self) -> std::time::Duration {
let exp = self.attempt.min(5);
let base_ms = 500u64
.saturating_mul(2u64.pow(exp))
.min(self.max_backoff_ms);
let max_jitter = (base_ms / 2).max(base_ms);
let offset = fastrand_u64(max_jitter + 1).saturating_sub(base_ms / 2);
let total = base_ms.saturating_add(offset).min(self.max_backoff_ms);
std::time::Duration::from_millis(total)
}
fn next(&mut self) {
self.attempt += 1;
}
}
fn fastrand_u64(n: u64) -> u64 {
use std::sync::atomic::{AtomicU64, Ordering};
static STATE: AtomicU64 = AtomicU64::new(0x193_667_6a_5e_7c_57);
if n <= 1 {
return 0;
}
let mut current = STATE.load(Ordering::Relaxed);
loop {
let new_val = current.wrapping_mul(6364136223846793005).wrapping_add(1);
match STATE.compare_exchange_weak(current, new_val, Ordering::Relaxed, Ordering::Relaxed) {
Ok(_) => return new_val % n,
Err(actual) => current = actual,
}
}
}
fn is_retryable_error(err: &AgentError) -> bool {
let msg = err.to_string();
msg.contains("connection refused")
|| msg.contains("connection timed out")
|| msg.contains("network error")
|| msg.contains("dns error")
|| msg.contains("error sending request")
|| msg.contains("Http client error")
|| msg.contains("rate_limit")
|| msg.contains("rate limit")
|| msg.contains("429")
|| msg.contains("500")
|| msg.contains("502")
|| msg.contains("503")
|| msg.contains("504")
|| msg.contains("internal_server_error")
|| msg.contains("service_unavailable")
|| msg.contains("gateway_timeout")
|| msg.contains("bad_gateway")
}
static AI_METRICS: std::sync::OnceLock<AiMetrics> = std::sync::OnceLock::new();
fn ai_metrics() -> &'static AiMetrics {
AI_METRICS.get_or_init(AiMetrics::new)
}
// ── Type conversions ─────────────────────────────────────────────────────────
pub(crate) fn to_rig_message(msg: &ChatRequestMessage) -> RigMessage {
match msg.role.as_str() {
"system" => {
// System messages are handled via preamble(), not passed as messages.
// We still need to return a valid RigMessage variant.
RigMessage::user(msg.content.as_deref().unwrap_or(""))
}
"user" => RigMessage::user(msg.content.as_deref().unwrap_or("")),
"assistant" => {
let mut parts: Vec<AssistantContent> = Vec::new();
if let Some(ref content) = msg.content {
if !content.is_empty() {
parts.push(AssistantContent::text(content));
}
}
if let Some(ref tool_calls) = msg.tool_calls {
for tc in tool_calls {
// GLM may return empty tool call IDs — fall back to a generated UUID.
let id = if tc.id.is_empty() {
Uuid::new_v4().to_string()
} else {
tc.id.clone()
};
parts.push(AssistantContent::tool_call_with_call_id(
&id,
id.clone(),
&tc.function.name,
serde_json::from_str(&tc.function.arguments)
.unwrap_or(serde_json::Value::Null),
));
}
}
if parts.is_empty() {
RigMessage::assistant("")
} else if parts.len() == 1 {
// Single part — use simpler constructors
match parts.pop().unwrap() {
AssistantContent::Text(t) => RigMessage::assistant(t.text),
ac => RigMessage::Assistant {
id: None,
content: OneOrMany::one(ac),
},
}
} else {
let content = OneOrMany::many(parts).expect("non-empty parts");
RigMessage::Assistant { id: None, content }
}
}
"tool" | "function" => {
let id = msg.tool_call_id.as_deref().unwrap_or("unknown").to_string();
let call_id = msg.tool_call_id.clone().or_else(|| Some(id.clone()));
let content = msg.content.as_deref().unwrap_or("");
RigMessage::tool_result_with_call_id(id, call_id, content)
}
"developer" => {
// Developer role maps to user/system in rig
RigMessage::user(msg.content.as_deref().unwrap_or(""))
}
_ => RigMessage::user(msg.content.as_deref().unwrap_or("")),
}
}
fn to_rig_tool_def(tool_json: &serde_json::Value) -> Option<ToolDefinition> {
let name = tool_json
.get("function")
.and_then(|f| f.get("name"))
.and_then(|n| n.as_str())?
.to_string();
let description = tool_json
.get("function")
.and_then(|f| f.get("description"))
.and_then(|d| d.as_str())
.map(|s| s.to_string())
.unwrap_or_default();
let parameters = tool_json
.get("function")
.and_then(|f| f.get("parameters"))
.cloned()
.unwrap_or(serde_json::json!({}));
Some(ToolDefinition {
name,
description,
parameters,
})
}
// ── Call helpers ─────────────────────────────────────────────────────────────
async fn do_completion<M>(
model: &M,
messages: &[ChatRequestMessage],
temperature: Option<f64>,
max_tokens: Option<u32>,
tools: Option<&[serde_json::Value]>,
tool_choice: Option<&str>,
) -> Result<(String, u64, u64, Vec<ClientToolCall>, Vec<String>)>
where
M: CompletionModel<Client = openai::Client>,
{
let preamble = messages
.iter()
.find(|m| m.role == "system")
.and_then(|m| m.content.as_deref())
.unwrap_or("")
.to_string();
let non_system: Vec<RigMessage> = messages
.iter()
.filter(|m| m.role != "system")
.map(to_rig_message)
.collect();
let tool_defs: Vec<ToolDefinition> = tools
.map(|ts| ts.iter().filter_map(to_rig_tool_def).collect())
.unwrap_or_default();
let mut builder = model.completion_request("");
if !preamble.is_empty() {
builder = builder.preamble(preamble);
}
if !non_system.is_empty() {
builder = builder.messages(non_system);
}
if let Some(t) = temperature {
builder = builder.temperature(t);
}
if let Some(mt) = max_tokens {
builder = builder.max_tokens(mt as u64);
}
if !tool_defs.is_empty() {
builder = builder.tools(tool_defs);
}
// Only set tool_choice when explicitly provided (mirrors call_stream_once logic)
if let Some(tc) = tool_choice {
match tc {
"none" => {
builder = builder.tool_choice(rig::completion::message::ToolChoice::None);
}
"auto" => {
builder = builder.tool_choice(rig::completion::message::ToolChoice::Auto);
}
s => {
builder = builder.tool_choice(rig::completion::message::ToolChoice::Specific {
function_names: vec![s.to_string()],
});
}
}
}
let response = builder
.send()
.await
.map_err(|e| AgentError::OpenAi(e.to_string()))?;
let mut content = String::new();
let mut tool_names: Vec<String> = Vec::new();
let mut tool_calls: Vec<ClientToolCall> = Vec::new();
for item in response.choice {
match item {
AssistantContent::Text(t) => {
content.push_str(&t.text);
}
AssistantContent::ToolCall(tc) => {
tool_names.push(tc.function.name.clone());
tool_calls.push(ClientToolCall {
id: tc.id,
type_: "function".into(),
function: types::ToolCallFunction {
name: tc.function.name,
arguments: serde_json::to_string(&tc.function.arguments)
.unwrap_or_else(|_| "{}".to_string()),
},
});
}
AssistantContent::Reasoning(_) => {}
AssistantContent::Image(_) => {}
}
}
let input_tokens = response.usage.input_tokens;
let output_tokens = response.usage.output_tokens;
Ok((content, input_tokens, output_tokens, tool_calls, tool_names))
}
// ── Public API ───────────────────────────────────────────────────────────────
/// Call the AI model with automatic retry (no custom params).
pub async fn call_with_retry(
messages: &[ChatRequestMessage],
model_name: &str,
config: &AiClientConfig,
max_retries: Option<u32>,
) -> Result<AiCallResponse> {
let client = config.build_rig_client();
let model = client.completion_model(model_name);
let mut state = RetryState::new(max_retries.unwrap_or(3));
loop {
let start = Instant::now();
let result = do_completion(&model, messages, None, None, None, None).await;
match result {
Ok((content, input_tokens, output_tokens, tool_calls, tool_names)) => {
let latency_ms = start.elapsed().as_millis() as i64;
let has_function_call = !tool_names.is_empty();
ai_metrics().record_success(
input_tokens as i64,
output_tokens as i64,
has_function_call,
);
return Ok(AiCallResponse {
content,
input_tokens: input_tokens as i64,
output_tokens: output_tokens as i64,
latency_ms,
tool_calls,
tool_calls_finished: tool_names,
});
}
Err(ref err) if state.should_retry() && is_retryable_error(err) => {
let duration = state.backoff_duration();
tracing::warn!(
attempt = state.attempt + 1,
max_retries = state.max_retries,
backoff_ms = duration.as_millis() as u64,
model = %model_name,
error = %err,
"ai_call_retry"
);
tokio::time::sleep(duration).await;
state.next();
}
Err(err) => {
ai_metrics().record_failure();
return Err(err);
}
}
}
}
/// Call with custom parameters (temperature, max_tokens, optional tools, optional tool_choice).
pub async fn call_with_params(
messages: &[ChatRequestMessage],
model_name: &str,
config: &AiClientConfig,
temperature: f32,
max_tokens: u32,
max_retries: Option<u32>,
tools: Option<&[serde_json::Value]>,
tool_choice: Option<&str>,
) -> Result<AiCallResponse> {
let client = config.build_rig_client();
let model = client.completion_model(model_name);
let mut state = RetryState::new(max_retries.unwrap_or(3));
loop {
let start = Instant::now();
let result = do_completion(
&model,
messages,
Some(temperature as f64),
Some(max_tokens),
tools,
tool_choice,
)
.await;
match result {
Ok((content, input_tokens, output_tokens, tool_calls, tool_names)) => {
let latency_ms = start.elapsed().as_millis() as i64;
let has_function_call = !tool_names.is_empty();
ai_metrics().record_success(
input_tokens as i64,
output_tokens as i64,
has_function_call,
);
return Ok(AiCallResponse {
content,
input_tokens: input_tokens as i64,
output_tokens: output_tokens as i64,
latency_ms,
tool_calls,
tool_calls_finished: tool_names,
});
}
Err(ref err) if state.should_retry() && is_retryable_error(err) => {
let duration = state.backoff_duration();
tracing::warn!(
attempt = state.attempt + 1,
max_retries = state.max_retries,
backoff_ms = duration.as_millis() as u64,
model = %model_name,
error = %err,
"ai_call_retry"
);
tokio::time::sleep(duration).await;
state.next();
}
Err(err) => {
ai_metrics().record_failure();
return Err(err);
}
}
}
}
/// A tool call extracted from streaming response with accumulated arguments.
#[derive(Debug, Clone)]
pub struct StreamedToolCall {
/// Tool call ID
pub id: String,
/// Tool function name
pub name: String,
/// Accumulated JSON arguments string
pub arguments: String,
}
/// Type of chunk in the streaming response, preserving arrival order.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StreamChunkType {
Thinking,
Answer,
ToolCall,
ToolResult,
}
/// A single chunk from the streaming response in arrival order.
#[derive(Debug, Clone)]
pub struct StreamChunk {
pub chunk_type: StreamChunkType,
pub content: String,
}
/// Streaming result from rig.
#[derive(Debug)]
pub struct StreamResponse {
pub content: String,
pub input_tokens: i64,
pub output_tokens: i64,
/// Accumulated reasoning/thinking text from the model.
pub reasoning_content: String,
/// Full tool calls with accumulated arguments (not just names)
pub tool_calls: Vec<StreamedToolCall>,
/// All chunks in arrival order — preserves think/answer/tool interleaving.
pub chunks: Vec<StreamChunk>,
}
/// Async callback: takes a string delta and broadcasts it to the WebSocket.
/// The returned Future must be awaited by the caller.
pub type StreamTextCb =
Arc<dyn Fn(&str) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
pub type StreamReasoningCb =
Arc<dyn Fn(&str) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>> + Send + Sync>;
pub type StreamToolCallCb = Arc<
dyn Fn(&StreamedToolCall) -> Pin<Box<dyn std::future::Future<Output = ()> + Send>>
+ Send
+ Sync,
>;
/// Run a streaming chat completion with 60s timeout and 5 retries.
pub async fn call_stream(
messages: &[ChatRequestMessage],
model_name: &str,
config: &AiClientConfig,
temperature: f32,
max_tokens: u32,
tools: Option<&[serde_json::Value]>,
tool_choice: Option<&str>,
on_text_delta: StreamTextCb,
on_reasoning_delta: StreamReasoningCb,
on_tool_call: StreamToolCallCb,
) -> Result<StreamResponse> {
let mut state = RetryState::new(5);
loop {
let result = call_stream_once(
messages,
model_name,
config,
temperature,
max_tokens,
tools,
tool_choice,
on_text_delta.clone(),
on_reasoning_delta.clone(),
on_tool_call.clone(),
)
.await;
match result {
Ok(response) => return Ok(response),
Err(ref err) if state.should_retry() && is_retryable_error(err) => {
let duration = state.backoff_duration();
tracing::warn!(
attempt = state.attempt + 1,
max_retries = 5,
backoff_ms = duration.as_millis() as u64,
model = %model_name,
error = %err,
"ai_stream_retry"
);
tokio::time::sleep(duration).await;
state.next();
}
Err(err) => {
ai_metrics().record_failure();
return Err(err);
}
}
}
}
/// Single attempt of streaming completion with 60s timeout.
async fn call_stream_once(
messages: &[ChatRequestMessage],
model_name: &str,
config: &AiClientConfig,
temperature: f32,
max_tokens: u32,
tools: Option<&[serde_json::Value]>,
tool_choice: Option<&str>,
on_text_delta: StreamTextCb,
on_reasoning_delta: StreamReasoningCb,
on_tool_call: StreamToolCallCb,
) -> Result<StreamResponse> {
let client = config.build_rig_client();
let model = client.completion_model(model_name);
let preamble = messages
.iter()
.find(|m| m.role == "system")
.and_then(|m| m.content.as_deref())
.unwrap_or("")
.to_string();
let non_system: Vec<RigMessage> = messages
.iter()
.filter(|m| m.role != "system")
.map(to_rig_message)
.collect();
let tool_defs: Vec<ToolDefinition> = tools
.map(|ts| ts.iter().filter_map(to_rig_tool_def).collect())
.unwrap_or_default();
let mut builder = model
.completion_request("")
.temperature(temperature as f64)
.max_tokens(max_tokens as u64);
if !preamble.is_empty() {
builder = builder.preamble(preamble);
}
if !non_system.is_empty() {
builder = builder.messages(non_system);
}
if !tool_defs.is_empty() {
builder = builder.tools(tool_defs);
}
if let Some(tc) = tool_choice {
match tc {
"none" => {
builder = builder.tool_choice(rig::completion::message::ToolChoice::None);
}
"auto" => {
builder = builder.tool_choice(rig::completion::message::ToolChoice::Auto);
}
s => {
builder = builder.tool_choice(rig::completion::message::ToolChoice::Specific {
function_names: vec![s.to_string()],
});
}
}
}
let stream_fut = async {
let mut stream = builder
.stream()
.await
.map_err(|e| AgentError::OpenAi(e.to_string()))?;
let mut content = String::new();
let mut reasoning_content = String::new();
let mut tool_calls: Vec<StreamedToolCall> = Vec::new();
let mut chunks: Vec<StreamChunk> = Vec::new();
// Some models (e.g. GLM) ignore tool_choice="none" and still emit tool_calls.
// Filter them out so they don't cause spurious tool execution attempts.
let skip_tool_calls = tool_choice == Some("none");
use std::collections::HashMap;
let mut partial_tool_calls: HashMap<String, StreamedToolCall> = HashMap::new();
let mut stream_finished = false;
use rig::streaming::StreamedAssistantContent;
while let Some(item) = stream.next().await {
match item {
Ok(StreamedAssistantContent::Text(text)) => {
content.push_str(&text.text);
on_text_delta(&text.text).await;
chunks.push(StreamChunk {
chunk_type: StreamChunkType::Answer,
content: text.text,
});
}
Ok(StreamedAssistantContent::ToolCall {
tool_call,
internal_call_id,
}) => {
if skip_tool_calls {
partial_tool_calls.remove(&internal_call_id);
continue;
}
let arguments = match &tool_call.function.arguments {
serde_json::Value::String(s) => s.clone(),
other => serde_json::to_string(other).unwrap_or_else(|_| "{}".to_string()),
};
let tc = StreamedToolCall {
id: tool_call.id.clone(),
name: tool_call.function.name.clone(),
arguments,
};
on_tool_call(&tc).await;
chunks.push(StreamChunk {
chunk_type: StreamChunkType::ToolCall,
content: serde_json::json!({
"id": tc.id,
"name": tc.name,
"arguments": tc.arguments,
})
.to_string(),
});
tool_calls.push(tc);
partial_tool_calls.remove(&internal_call_id);
}
Ok(StreamedAssistantContent::ToolCallDelta {
id,
internal_call_id,
content: delta_content,
}) => {
if skip_tool_calls {
continue;
}
use rig::streaming::ToolCallDeltaContent;
match delta_content {
ToolCallDeltaContent::Name(name) => {
partial_tool_calls.insert(
internal_call_id.clone(),
StreamedToolCall {
id: id.clone(),
name,
arguments: String::new(),
},
);
}
ToolCallDeltaContent::Delta(delta) => {
if let Some(tc) = partial_tool_calls.get_mut(&internal_call_id) {
tc.arguments.push_str(&delta);
}
}
}
}
Ok(StreamedAssistantContent::Reasoning(reasoning)) => {
for part in &reasoning.content {
if let rig::completion::message::ReasoningContent::Text { text, .. } = part
{
reasoning_content.push_str(text);
on_reasoning_delta(text).await;
chunks.push(StreamChunk {
chunk_type: StreamChunkType::Thinking,
content: text.clone(),
});
}
}
}
Ok(StreamedAssistantContent::ReasoningDelta { reasoning, .. }) => {
reasoning_content.push_str(&reasoning);
on_reasoning_delta(&reasoning).await;
chunks.push(StreamChunk {
chunk_type: StreamChunkType::Thinking,
content: reasoning.clone(),
});
}
Ok(StreamedAssistantContent::Final(response)) => {
stream_finished = true;
if !skip_tool_calls {
for (_, tc) in partial_tool_calls.drain() {
tool_calls.push(tc);
}
} else {
partial_tool_calls.drain();
}
if let Some(usage) = response.token_usage() {
let in_toks = usage.input_tokens as i64;
let out_toks = usage.output_tokens as i64;
ai_metrics().record_success(in_toks, out_toks, !tool_calls.is_empty());
return Ok(StreamResponse {
content,
reasoning_content,
input_tokens: in_toks,
output_tokens: out_toks,
tool_calls,
chunks,
});
}
// Usage not available from Final — fall through to flush
}
Err(e) => return Err(AgentError::OpenAi(e.to_string())),
}
}
// Flush any remaining partial tool calls (if stream ended without Final or Final had no usage)
if !stream_finished && !skip_tool_calls {
for (_, tc) in partial_tool_calls.drain() {
tool_calls.push(tc);
}
}
ai_metrics().record_success(0, 0, !tool_calls.is_empty());
Ok(StreamResponse {
content,
reasoning_content,
input_tokens: 0,
output_tokens: 0,
tool_calls,
chunks,
})
};
// 120s timeout for the entire stream
match tokio::time::timeout(std::time::Duration::from_secs(120), stream_fut).await {
Ok(result) => result,
Err(_) => Err(AgentError::Timeout {
task_id: 0,
seconds: 120,
}),
}
}

View File

@ -1,240 +0,0 @@
//! Internal message types for OpenAI-compatible chat completion API.
//!
//! Uses plain structs with `role: String` instead of an enum — easier to serialize,
//! and the downstream code only constructs specific variants anyway.
use serde::{Deserialize, Serialize};
/// A message in a chat completion request.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatRequestMessage {
/// One of "system", "user", "assistant", "tool", "developer", "function"
pub role: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub name: Option<String>,
/// Required for "tool" role messages
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_call_id: Option<String>,
/// Tool calls for "assistant" role messages
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCall>>,
}
impl ChatRequestMessage {
pub fn system(content: impl Into<String>) -> Self {
Self {
role: "system".into(),
content: Some(content.into()),
name: None,
tool_call_id: None,
tool_calls: None,
}
}
pub fn user(content: impl Into<String>) -> Self {
Self {
role: "user".into(),
content: Some(content.into()),
name: None,
tool_call_id: None,
tool_calls: None,
}
}
pub fn assistant(content: Option<String>, tool_calls: Option<Vec<ToolCall>>) -> Self {
Self {
role: "assistant".into(),
content,
name: None,
tool_call_id: None,
tool_calls,
}
}
pub fn tool(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
Self {
role: "tool".into(),
content: Some(content.into()),
name: None,
tool_call_id: Some(tool_call_id.into()),
tool_calls: None,
}
}
pub fn with_name(mut self, name: impl Into<String>) -> Self {
self.name = Some(name.into());
self
}
pub fn developer(content: impl Into<String>) -> Self {
Self {
role: "developer".into(),
content: Some(content.into()),
name: None,
tool_call_id: None,
tool_calls: None,
}
}
/// Creates a function/assistant message with tool_calls (used to record the AI's tool call).
pub fn with_tool_calls(mut self, tool_calls: Vec<ToolCall>) -> Self {
self.tool_calls = Some(tool_calls);
self
}
}
/// A tool call within an assistant message.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
pub id: String,
#[serde(rename = "type")]
pub type_: String,
pub function: ToolCallFunction,
}
/// Function details within a tool call.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallFunction {
pub name: String,
pub arguments: String,
}
/// Chat completion request body (serialized to JSON for the HTTP API).
#[derive(Debug, Clone, Serialize)]
pub struct ChatCompletionRequest {
pub model: String,
pub messages: Vec<ChatRequestMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_completion_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub frequency_penalty: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub presence_penalty: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<ReasoningEffort>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<serde_json::Value>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<serde_json::Value>,
}
impl ChatCompletionRequest {
pub fn with_stream(mut self) -> Self {
self.stream = Some(true);
self
}
}
/// Reasoning effort level for supported models.
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum ReasoningEffort {
High,
}
// ── Response types (non-streaming) ──
/// Chat completion response (non-streaming). Deserialize-only from the API JSON.
#[derive(Debug, Clone, Deserialize)]
pub struct ChatCompletionResponse {
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub model: Option<String>,
pub choices: Vec<Choice>,
#[serde(default)]
pub usage: Option<Usage>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct Choice {
pub index: u32,
pub message: ResponseMessage,
pub finish_reason: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseMessage {
pub role: Option<String>,
pub content: Option<String>,
#[serde(default)]
pub tool_calls: Option<Vec<ResponseToolCall>>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseToolCall {
pub id: String,
#[serde(rename = "type")]
pub type_: String,
pub function: ResponseToolCallFunction,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseToolCallFunction {
pub name: String,
pub arguments: String,
}
/// Token usage from the response.
#[derive(Debug, Clone, Deserialize)]
pub struct Usage {
#[serde(rename = "prompt_tokens", alias = "input_tokens")]
pub prompt_tokens: u64,
#[serde(rename = "completion_tokens", alias = "output_tokens")]
pub completion_tokens: u64,
}
// ── Streaming types ──
/// A chunk from a streaming chat completion (SSE `data:` lines).
#[derive(Debug, Clone, Deserialize)]
pub struct StreamChunk {
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub model: Option<String>,
pub choices: Vec<StreamChoice>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct StreamChoice {
pub delta: Delta,
pub finish_reason: Option<String>,
pub index: u32,
}
#[derive(Debug, Clone, Deserialize)]
pub struct Delta {
#[serde(default)]
pub role: Option<String>,
#[serde(default)]
pub content: Option<String>,
#[serde(default)]
pub tool_calls: Option<Vec<StreamToolCall>>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct StreamToolCall {
pub index: u32,
#[serde(default)]
pub id: Option<String>,
#[serde(rename = "type", default)]
pub type_: Option<String>,
pub function: Option<StreamToolCallFunction>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct StreamToolCallFunction {
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub arguments: Option<String>,
}

View File

@ -1,39 +0,0 @@
use crate::AgentError;
use models::Expr;
use models::rooms::room_message::{
Column as RmCol, Entity as RoomMessage, Model as RoomMessageModel,
};
use sea_orm::*;
impl super::CompactService {
pub async fn fetch_room_messages_secure(
&self,
room_id: uuid::Uuid,
requester_id: uuid::Uuid,
) -> Result<Vec<RoomMessageModel>, AgentError> {
use models::rooms::{RoomAccess, RoomUserState};
RoomMessage::find()
.filter(RmCol::Room.eq(room_id))
.filter(
Condition::any()
.add(Expr::exists(
RoomUserState::find()
.filter(models::rooms::room_user_state::Column::Room.eq(room_id))
.filter(models::rooms::room_user_state::Column::User.eq(requester_id))
.into_query(),
))
.add(Expr::exists(
RoomAccess::find()
.filter(models::rooms::room_access::Column::Room.eq(room_id))
.filter(models::rooms::room_access::Column::User.eq(requester_id))
.into_query(),
)),
)
.order_by_asc(RmCol::Seq)
.limit(10000)
.all(&self.db)
.await
.map_err(|e| AgentError::Internal(e.to_string()))
}
}

View File

@ -1,45 +0,0 @@
use super::types::{CompactSummary, MessageSummary};
pub fn messages_to_text<F>(
messages: &[models::rooms::room_message::Model],
sender_mapper: F,
) -> String
where
F: Fn(&models::rooms::room_message::Model) -> String,
{
messages
.iter()
.map(|m| {
let sender = sender_mapper(m);
format!("[{}] {}: {}", m.send_at, sender, m.content)
})
.collect::<Vec<_>>()
.join("\n")
}
pub fn retained_as_text(retained: &[MessageSummary]) -> String {
retained
.iter()
.map(|m| format!("[{}] {}: {}", m.send_at, m.sender_name, m.content))
.collect::<Vec<_>>()
.join("\n")
}
pub fn summary_content(summary: &CompactSummary) -> String {
if summary.summary.is_empty() {
format!(
"## Recent conversation ({} messages)\n\n{}",
summary.retained.len(),
retained_as_text(&summary.retained)
)
} else {
format!(
"## Earlier conversation ({} messages summarised)\n{}\n\n\
## Most recent {} messages\n\n{}",
summary.messages_compressed,
summary.summary,
summary.retained.len(),
retained_as_text(&summary.retained)
)
}
}

View File

@ -1,56 +0,0 @@
//! Context compaction for AI sessions and room message history.
pub mod auth_fetch;
pub mod helpers;
pub mod room_compactor;
pub mod summarizer;
pub mod types;
use sea_orm::DatabaseConnection;
pub use types::{
CompactConfig, CompactLevel, CompactSummary, MessageSummary, RoomCompactContext,
RoomCompactRecord, ThresholdResult,
};
#[derive(Clone)]
pub struct CompactService {
db: DatabaseConnection,
ai_client_config: crate::client::AiClientConfig,
model: String,
model_context_limit: Option<usize>,
}
impl CompactService {
pub fn new(
db: DatabaseConnection,
ai_client_config: crate::client::AiClientConfig,
model: String,
) -> Self {
Self {
db,
ai_client_config,
model,
model_context_limit: None,
}
}
pub fn for_model(&self, model: impl Into<String>) -> Self {
Self {
db: self.db.clone(),
ai_client_config: self.ai_client_config.clone(),
model: model.into(),
model_context_limit: self.model_context_limit,
}
}
pub fn with_model_context_limit(mut self, model_context_limit: Option<usize>) -> Self {
self.model_context_limit = model_context_limit.filter(|limit| *limit > 0);
self
}
pub fn for_model_entry(&self, model: &models::agents::model::Model) -> Self {
self.for_model(model.name.clone())
.with_model_context_limit(Some(model.context_length.max(0) as usize))
}
}

View File

@ -1,422 +0,0 @@
use models::rooms::room_message::{
Column as RmCol, Entity as RoomMessage, Model as RoomMessageModel,
};
use sea_orm::ColumnTrait;
use sea_orm::{ConnectionTrait, EntityTrait, QueryFilter, QueryOrder, QuerySelect};
use crate::compact::types::{CompactConfig, CompactLevel, RoomCompactContext, RoomCompactRecord};
use crate::tokent::resolve_usage;
use crate::{AgentError, CompactSummary, MessageSummary};
impl super::CompactService {
pub async fn latest_room_compact_record(
&self,
room_id: uuid::Uuid,
) -> Result<Option<RoomCompactRecord>, AgentError> {
let stmt = sea_orm::Statement::from_sql_and_values(
sea_orm::DbBackend::Postgres,
"SELECT id, room, from_seq, to_seq, summary, message_count, source_message_ids, created_at \
FROM room_compact_summary WHERE room = $1 ORDER BY to_seq DESC, created_at DESC LIMIT 1",
vec![room_id.into()],
);
let Some(row) = self
.db
.query_one_raw(stmt)
.await
.map_err(|e| AgentError::Internal(e.to_string()))?
else {
return Ok(None);
};
let source_json: serde_json::Value = row
.try_get("", "source_message_ids")
.map_err(|e| AgentError::Internal(e.to_string()))?;
let source_message_ids = source_json
.as_array()
.map(|ids| {
ids.iter()
.filter_map(|v| v.as_str())
.filter_map(|s| uuid::Uuid::parse_str(s).ok())
.collect::<Vec<_>>()
})
.unwrap_or_default();
Ok(Some(RoomCompactRecord {
id: row
.try_get("", "id")
.map_err(|e| AgentError::Internal(e.to_string()))?,
room_id: row
.try_get("", "room")
.map_err(|e| AgentError::Internal(e.to_string()))?,
from_seq: row
.try_get("", "from_seq")
.map_err(|e| AgentError::Internal(e.to_string()))?,
to_seq: row
.try_get("", "to_seq")
.map_err(|e| AgentError::Internal(e.to_string()))?,
summary: row
.try_get("", "summary")
.map_err(|e| AgentError::Internal(e.to_string()))?,
message_count: row
.try_get("", "message_count")
.map_err(|e| AgentError::Internal(e.to_string()))?,
source_message_ids,
created_at: row
.try_get("", "created_at")
.map_err(|e| AgentError::Internal(e.to_string()))?,
}))
}
async fn insert_room_compact_record(
&self,
room_id: uuid::Uuid,
from_seq: i64,
to_seq: i64,
summary: &str,
source_message_ids: &[uuid::Uuid],
) -> Result<RoomCompactRecord, AgentError> {
let id = uuid::Uuid::new_v4();
let now = chrono::Utc::now();
let source_json = serde_json::Value::Array(
source_message_ids
.iter()
.map(|id| serde_json::Value::String(id.to_string()))
.collect(),
);
let stmt = sea_orm::Statement::from_sql_and_values(
sea_orm::DbBackend::Postgres,
"INSERT INTO room_compact_summary \
(id, room, from_seq, to_seq, summary, message_count, source_message_ids, created_at, updated_at) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)",
vec![
id.into(),
room_id.into(),
from_seq.into(),
to_seq.into(),
summary.to_string().into(),
(source_message_ids.len() as i32).into(),
source_json.into(),
now.into(),
now.into(),
],
);
self.db
.execute_raw(stmt)
.await
.map_err(|e| AgentError::Internal(e.to_string()))?;
Ok(RoomCompactRecord {
id,
room_id,
from_seq,
to_seq,
summary: summary.to_string(),
message_count: source_message_ids.len() as i32,
source_message_ids: source_message_ids.to_vec(),
created_at: now,
})
}
fn clean_dedupe_sort_messages(mut messages: Vec<RoomMessageModel>) -> Vec<RoomMessageModel> {
messages.retain(|m| {
m.revoked.is_none()
&& !m.content.trim().is_empty()
&& matches!(m.content_type, models::rooms::MessageContentType::Text)
});
messages.sort_by_key(|m| (m.seq, m.send_at));
let mut seen = std::collections::HashSet::new();
messages
.into_iter()
.filter(|m| {
let normalized = m
.content
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_lowercase();
let key = format!("{}:{:?}:{}", m.sender_type, m.sender_id, normalized);
seen.insert(key)
})
.collect()
}
fn resolve_retain_count(config: CompactConfig, estimated_tokens: usize) -> usize {
let level = if config.auto_level {
CompactLevel::auto_select(estimated_tokens, config.token_threshold)
} else {
config.default_level
};
level.retain_count()
}
pub async fn prepare_room_compact_context(
&self,
room_id: uuid::Uuid,
requester_id: uuid::Uuid,
user_names: Option<std::collections::HashMap<uuid::Uuid, String>>,
config: CompactConfig,
) -> Result<RoomCompactContext, AgentError> {
let latest = self.latest_room_compact_record(room_id).await?;
let cutoff_seq = latest.as_ref().map(|r| r.to_seq);
let previous_summary = latest.as_ref().map(|r| r.summary.as_str());
let messages = self
.fetch_room_messages_secure(room_id, requester_id)
.await?;
let messages = messages
.into_iter()
.filter(|m| cutoff_seq.map(|seq| m.seq > seq).unwrap_or(true))
.collect::<Vec<_>>();
let messages = Self::clean_dedupe_sort_messages(messages);
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = match user_names {
Some(map) => map,
None => self.get_user_name_map(&user_ids).await?,
};
let sender_mapper = |m: &RoomMessageModel| {
if let Some(user_id) = m.sender_id {
if let Some(username) = user_name_map.get(&user_id) {
return username.clone();
}
}
m.sender_type.to_string()
};
let incremental_text = crate::compact::helpers::messages_to_text(&messages, sender_mapper);
let estimate_input = match previous_summary {
Some(summary) if !summary.is_empty() => format!("{}\n{}", summary, incremental_text),
_ => incremental_text.clone(),
};
let estimated_tokens = crate::tokent::count_message_text(&estimate_input, &self.model)
.unwrap_or_else(|_| estimate_input.len() / 4);
let retain_count = Self::resolve_retain_count(config, estimated_tokens);
if estimated_tokens >= config.token_threshold && messages.len() > retain_count {
let split_index = messages.len().saturating_sub(retain_count);
let (to_summarize, retained_messages) = messages.split_at(split_index);
let from_seq = to_summarize
.first()
.map(|m| m.seq)
.unwrap_or(cutoff_seq.unwrap_or(0) + 1);
let to_seq = to_summarize.last().map(|m| m.seq).unwrap_or(from_seq);
let source_ids: Vec<uuid::Uuid> = to_summarize.iter().map(|m| m.id).collect();
let (summary, _usage) = self
.summarize_room_increment(previous_summary, to_summarize, config.max_summary_tokens)
.await?;
let record = self
.insert_room_compact_record(room_id, from_seq, to_seq, &summary, &source_ids)
.await?;
let retained = retained_messages
.iter()
.map(|m| Self::message_to_summary(m, &user_name_map))
.collect();
return Ok(RoomCompactContext {
room_id,
cutoff_seq: Some(record.to_seq),
summary: Some(record.summary),
retained,
estimated_tokens,
compacted: true,
});
}
let retained = messages
.iter()
.rev()
.take(50)
.collect::<Vec<_>>()
.into_iter()
.rev()
.map(|m| Self::message_to_summary(m, &user_name_map))
.collect();
Ok(RoomCompactContext {
room_id,
cutoff_seq,
summary: latest.map(|r| r.summary),
retained,
estimated_tokens,
compacted: false,
})
}
pub async fn compact_room(
&self,
room_id: uuid::Uuid,
level: CompactLevel,
user_names: Option<std::collections::HashMap<uuid::Uuid, String>>,
requester_id: uuid::Uuid,
context_window_tokens: i32,
compaction_max_summary_ratio: f32,
) -> Result<CompactSummary, AgentError> {
let messages = self
.fetch_room_messages_secure(room_id, requester_id)
.await?;
if messages.is_empty() {
let room_exists = models::rooms::room::Entity::find_by_id(room_id)
.one(&self.db)
.await
.map_err(|e| AgentError::Internal(e.to_string()))?
.is_some();
if room_exists {
return Err(AgentError::Internal("Access denied or room empty".into()));
} else {
return Err(AgentError::Internal("Room not found".into()));
}
}
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = match user_names {
Some(map) => map,
None => self.get_user_name_map(&user_ids).await?,
};
if messages.len() <= level.retain_count() {
let retained: Vec<MessageSummary> = messages
.iter()
.map(|m| Self::message_to_summary(m, &user_name_map))
.collect();
return Ok(CompactSummary {
session_id: uuid::Uuid::new_v4(),
room_id,
retained,
summary: String::new(),
compacted_at: chrono::Utc::now(),
messages_compressed: 0,
usage: None,
});
}
let retain_count = level.retain_count();
let split_index = messages.len().saturating_sub(retain_count);
let (to_summarize, retained_messages) = messages.split_at(split_index);
let retained: Vec<MessageSummary> = retained_messages
.iter()
.map(|m| Self::message_to_summary(m, &user_name_map))
.collect();
let max_summary_tokens = CompactConfig::summary_token_budget(
context_window_tokens.max(0) as usize,
compaction_max_summary_ratio,
);
let (summary, remote_usage) = self
.summarize_messages(to_summarize, max_summary_tokens)
.await?;
let summarized_text = to_summarize
.iter()
.map(|m| m.content.as_str())
.collect::<Vec<_>>()
.join("\n");
let usage = resolve_usage(remote_usage, &self.model, &summarized_text, &summary);
Ok(CompactSummary {
session_id: uuid::Uuid::new_v4(),
room_id,
retained,
summary,
compacted_at: chrono::Utc::now(),
messages_compressed: to_summarize.len(),
usage: Some(usage),
})
}
pub async fn compact_session(
&self,
session_id: uuid::Uuid,
level: CompactLevel,
user_names: Option<std::collections::HashMap<uuid::Uuid, String>>,
context_window_tokens: i32,
compaction_max_summary_ratio: f32,
) -> Result<CompactSummary, AgentError> {
let messages: Vec<RoomMessageModel> = RoomMessage::find()
.filter(RmCol::Room.eq(session_id))
.order_by_asc(RmCol::Seq)
.limit(10000)
.all(&self.db)
.await
.map_err(|e| AgentError::Internal(e.to_string()))?;
if messages.is_empty() {
return Err(AgentError::Internal("session has no messages".into()));
}
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = match user_names {
Some(map) => map,
None => self.get_user_name_map(&user_ids).await?,
};
if messages.len() <= level.retain_count() {
let retained: Vec<MessageSummary> = messages
.iter()
.map(|m| Self::message_to_summary(m, &user_name_map))
.collect();
return Ok(CompactSummary {
session_id,
room_id: uuid::Uuid::nil(),
retained,
summary: String::new(),
compacted_at: chrono::Utc::now(),
messages_compressed: 0,
usage: None,
});
}
let retain_count = level.retain_count();
let split_index = messages.len().saturating_sub(retain_count);
let (to_summarize, retained_messages) = messages.split_at(split_index);
let retained: Vec<MessageSummary> = retained_messages
.iter()
.map(|m| Self::message_to_summary(m, &user_name_map))
.collect();
let max_summary_tokens = CompactConfig::summary_token_budget(
context_window_tokens.max(0) as usize,
compaction_max_summary_ratio,
);
let (summary, remote_usage) = self
.summarize_messages(to_summarize, max_summary_tokens)
.await?;
let summarized_text = to_summarize
.iter()
.map(|m| m.content.as_str())
.collect::<Vec<_>>()
.join("\n");
let usage = resolve_usage(remote_usage, &self.model, &summarized_text, &summary);
Ok(CompactSummary {
session_id,
room_id: uuid::Uuid::nil(),
retained,
summary,
compacted_at: chrono::Utc::now(),
messages_compressed: to_summarize.len(),
usage: Some(usage),
})
}
}

View File

@ -1,513 +0,0 @@
use models::rooms::room_message::Model as RoomMessageModel;
use models::users::user::{Column as UserCol, Entity as User};
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
use crate::AgentError;
use crate::client::call_with_params;
use crate::client::types::ChatRequestMessage;
use crate::compact::types::{CompactConfig, MessageSummary};
use crate::tokent::{TokenUsage, count_message_text};
const DEFAULT_MODEL_CONTEXT_LIMIT: usize = 128_000;
const MODEL_INPUT_RATIO_NUMERATOR: usize = 85;
const MODEL_INPUT_RATIO_DENOMINATOR: usize = 100;
const MIN_ROUND_SUMMARY_TOKENS: usize = 64;
#[derive(Clone, Copy)]
enum SummaryKind {
Conversation,
RoomIncrement,
}
impl super::CompactService {
pub async fn summarize_room_increment(
&self,
previous_summary: Option<&str>,
messages: &[RoomMessageModel],
max_summary_tokens: usize,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = self.get_user_name_map(&user_ids).await?;
let blocks = messages
.iter()
.map(|m| {
let sender = if let Some(user_id) = m.sender_id {
user_name_map
.get(&user_id)
.cloned()
.unwrap_or_else(|| m.sender_type.to_string())
} else {
m.sender_type.to_string()
};
format!("[{}] {}: {}", m.send_at, sender, m.content)
})
.collect::<Vec<_>>();
self.summarize_blocks_with_optional_previous(
blocks,
previous_summary,
max_summary_tokens,
SummaryKind::RoomIncrement,
)
.await
}
pub async fn summarize_messages(
&self,
messages: &[RoomMessageModel],
max_summary_tokens: usize,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let user_ids: Vec<uuid::Uuid> = messages
.iter()
.filter_map(|m| m.sender_id)
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let user_name_map = self.get_user_name_map(&user_ids).await?;
let blocks = messages
.iter()
.map(|m| {
let sender = if let Some(user_id) = m.sender_id {
user_name_map
.get(&user_id)
.cloned()
.unwrap_or_else(|| m.sender_type.to_string())
} else {
m.sender_type.to_string()
};
format!("[{}] {}: {}", m.send_at, sender, m.content)
})
.collect::<Vec<_>>();
self.summarize_blocks_with_optional_previous(
blocks,
None,
max_summary_tokens,
SummaryKind::Conversation,
)
.await
}
pub fn message_to_summary(
m: &RoomMessageModel,
user_name_map: &std::collections::HashMap<uuid::Uuid, String>,
) -> MessageSummary {
let sender_name = if let Some(user_id) = m.sender_id {
user_name_map
.get(&user_id)
.cloned()
.unwrap_or_else(|| m.sender_type.to_string())
} else {
m.sender_type.to_string()
};
MessageSummary {
id: m.id,
sender_type: m.sender_type.clone(),
sender_id: m.sender_id,
sender_name,
content: m.content.clone(),
content_type: m.content_type.clone(),
tool_call_id: None,
send_at: m.send_at,
}
}
pub async fn get_user_name_map(
&self,
user_ids: &[uuid::Uuid],
) -> Result<std::collections::HashMap<uuid::Uuid, String>, AgentError> {
use std::collections::HashMap;
let mut map = HashMap::new();
if !user_ids.is_empty() {
let users = User::find()
.filter(UserCol::Uid.is_in(user_ids.to_vec()))
.all(&self.db)
.await
.map_err(|e| AgentError::Internal(e.to_string()))?;
for user in users {
map.insert(user.uid, user.username);
}
}
Ok(map)
}
async fn summarize_blocks_with_optional_previous(
&self,
blocks: Vec<String>,
previous_summary: Option<&str>,
max_summary_tokens: usize,
kind: SummaryKind,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let final_budget = Self::final_summary_budget(max_summary_tokens);
let input_budget = self.safe_model_input_budget();
let round_budget = Self::round_summary_budget(final_budget, input_budget);
let mut total_usage = TokenUsage::default();
let mut has_usage = false;
let fitted_chunks =
self.split_blocks_to_fit(blocks, input_budget, round_budget, kind, false)?;
let mut partial_summaries = Vec::new();
for chunk in fitted_chunks {
let prompt = self.build_prompt(kind, false, &chunk, round_budget);
let (summary, usage) = self
.invoke_summary_prompt(&prompt, round_budget, Self::temperature_for(kind))
.await?;
Self::accumulate_usage(&mut total_usage, &mut has_usage, usage);
partial_summaries.push(summary);
}
if let Some(previous) = previous_summary
.map(str::trim)
.filter(|summary| !summary.is_empty())
{
partial_summaries.insert(0, previous.to_string());
}
if partial_summaries.is_empty() {
return Ok((String::new(), None));
}
if partial_summaries.len() == 1 && previous_summary.is_none() {
return Ok((
partial_summaries.remove(0),
if has_usage { Some(total_usage) } else { None },
));
}
let final_summary = self
.merge_summary_rounds(
partial_summaries,
final_budget,
round_budget,
kind,
&mut total_usage,
&mut has_usage,
)
.await?;
Ok((
final_summary,
if has_usage { Some(total_usage) } else { None },
))
}
async fn merge_summary_rounds(
&self,
mut summaries: Vec<String>,
final_budget: usize,
round_budget: usize,
kind: SummaryKind,
total_usage: &mut TokenUsage,
has_usage: &mut bool,
) -> Result<String, AgentError> {
let input_budget = self.safe_model_input_budget();
while summaries.len() > 1 {
let current_budget = if summaries.len() <= 2 {
final_budget
} else {
round_budget
};
let mut next_round = Vec::new();
let mut idx = 0usize;
while idx < summaries.len() {
if idx + 1 >= summaries.len() {
next_round.push(summaries[idx].clone());
idx += 1;
continue;
}
let pair = vec![summaries[idx].clone(), summaries[idx + 1].clone()];
let fitted_pairs =
self.split_blocks_to_fit(pair, input_budget, current_budget, kind, true)?;
for pair_text in fitted_pairs {
let prompt = self.build_prompt(kind, true, &pair_text, current_budget);
let (summary, usage) = self
.invoke_summary_prompt(&prompt, current_budget, Self::temperature_for(kind))
.await?;
Self::accumulate_usage(total_usage, has_usage, usage);
next_round.push(summary);
}
idx += 2;
}
summaries = next_round;
}
summaries
.pop()
.ok_or_else(|| AgentError::Internal("summary merge produced no output".into()))
}
async fn invoke_summary_prompt(
&self,
prompt: &str,
max_summary_tokens: usize,
temperature: f32,
) -> Result<(String, Option<TokenUsage>), AgentError> {
let response = call_with_params(
&[ChatRequestMessage::user(prompt.to_string())],
&self.model,
&self.ai_client_config,
temperature,
max_summary_tokens as u32,
None,
None,
None,
)
.await
.map_err(|e| AgentError::OpenAi(e.to_string()))?;
let usage =
TokenUsage::from_remote(response.input_tokens as u32, response.output_tokens as u32);
Ok((response.content, usage))
}
fn split_blocks_to_fit(
&self,
blocks: Vec<String>,
input_budget: usize,
max_summary_tokens: usize,
kind: SummaryKind,
is_merge: bool,
) -> Result<Vec<String>, AgentError> {
let mut chunks = Vec::new();
self.collect_fitting_chunks(
blocks,
input_budget,
max_summary_tokens,
kind,
is_merge,
&mut chunks,
)?;
Ok(chunks)
}
fn collect_fitting_chunks(
&self,
blocks: Vec<String>,
input_budget: usize,
max_summary_tokens: usize,
kind: SummaryKind,
is_merge: bool,
chunks: &mut Vec<String>,
) -> Result<(), AgentError> {
let body = Self::join_blocks(&blocks, is_merge);
let prompt = self.build_prompt(kind, is_merge, &body, max_summary_tokens);
if self.estimate_tokens(&prompt) <= input_budget {
chunks.push(body);
return Ok(());
}
if blocks.len() > 1 {
let mid = blocks.len() / 2;
self.collect_fitting_chunks(
blocks[..mid].to_vec(),
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
self.collect_fitting_chunks(
blocks[mid..].to_vec(),
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
return Ok(());
}
let single = blocks
.into_iter()
.next()
.ok_or_else(|| AgentError::Internal("cannot split empty summary block".into()))?;
let (left, right) = Self::split_text_in_half(&single)?;
self.collect_fitting_chunks(
vec![left],
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
self.collect_fitting_chunks(
vec![right],
input_budget,
max_summary_tokens,
kind,
is_merge,
chunks,
)?;
Ok(())
}
fn build_prompt(
&self,
kind: SummaryKind,
is_merge: bool,
body: &str,
max_summary_tokens: usize,
) -> String {
match (kind, is_merge) {
(SummaryKind::Conversation, false) => format!(
"Summarise the following conversation concisely, preserving all key facts, \
decisions, and any pending or in-progress work. \
The summary MUST NOT exceed {} tokens. \
Use this format:\n\n\
**Summary:** <one-paragraph overview>\n\
**Key decisions:** <bullet list or 'none'>\n\
**Open items:** <bullet list or 'none'>\n\n\
Conversation:\n\n{}",
max_summary_tokens, body
),
(SummaryKind::Conversation, true) => format!(
"Merge the following partial conversation summaries into a single concise summary. \
Deduplicate overlap, preserve chronology, and keep all concrete decisions, \
status updates, and unresolved work. The summary MUST NOT exceed {} tokens. \
Use this format:\n\n\
**Summary:** <one-paragraph overview>\n\
**Key decisions:** <bullet list or 'none'>\n\
**Open items:** <bullet list or 'none'>\n\n\
Partial summaries:\n\n{}",
max_summary_tokens, body
),
(SummaryKind::RoomIncrement, false) => format!(
"Create an incremental room summary from the new messages below. \
Deduplicate repeated messages, clean noise, keep chronological order, and preserve \
decisions, facts, assignments/owners, unresolved questions, and concrete next steps. \
The result MUST NOT exceed {} tokens.\n\n\
Format:\n\
**Summary:** <compact overview>\n\
**Decisions:** <bullets or 'none'>\n\
**Owners:** <bullets with owner -> task or 'none'>\n\
**Open items:** <bullets or 'none'>\n\n\
New messages:\n\n{}",
max_summary_tokens, body
),
(SummaryKind::RoomIncrement, true) => format!(
"Merge the following partial room summaries into one room summary. Deduplicate overlap, \
keep chronology, preserve decisions, facts, assignments/owners, unresolved questions, \
and concrete next steps. The result MUST NOT exceed {} tokens.\n\n\
Format:\n\
**Summary:** <compact overview>\n\
**Decisions:** <bullets or 'none'>\n\
**Owners:** <bullets with owner -> task or 'none'>\n\
**Open items:** <bullets or 'none'>\n\n\
Partial summaries:\n\n{}",
max_summary_tokens, body
),
}
}
fn join_blocks(blocks: &[String], is_merge: bool) -> String {
if is_merge {
blocks
.iter()
.enumerate()
.map(|(index, block)| format!("### Partial Summary {}\n{}", index + 1, block))
.collect::<Vec<_>>()
.join("\n\n")
} else {
blocks.join("\n")
}
}
fn split_text_in_half(text: &str) -> Result<(String, String), AgentError> {
if text.chars().count() < 2 {
return Err(AgentError::Internal(
"single summary block exceeds input budget and cannot be split".into(),
));
}
let midpoint = text.len() / 2;
let mut split_at = text.floor_char_boundary(midpoint);
if split_at == 0 || split_at >= text.len() {
split_at = text.ceil_char_boundary(midpoint);
}
if split_at == 0 || split_at >= text.len() {
return Err(AgentError::Internal(
"failed to split oversized summary block".into(),
));
}
Ok((text[..split_at].to_string(), text[split_at..].to_string()))
}
fn estimate_tokens(&self, text: &str) -> usize {
count_message_text(text, &self.model).unwrap_or_else(|_| (text.len() / 4).max(1))
}
fn safe_model_input_budget(&self) -> usize {
Self::safe_model_input_budget_from_limit(self.model_context_limit)
}
fn final_summary_budget(max_summary_tokens: usize) -> usize {
max_summary_tokens.clamp(
CompactConfig::MIN_SUMMARY_TOKENS,
CompactConfig::MAX_SUMMARY_TOKENS,
)
}
fn round_summary_budget(final_budget: usize, input_budget: usize) -> usize {
final_budget.min((input_budget / 8).max(MIN_ROUND_SUMMARY_TOKENS))
}
fn temperature_for(kind: SummaryKind) -> f32 {
match kind {
SummaryKind::Conversation => 0.3,
SummaryKind::RoomIncrement => 0.2,
}
}
fn safe_model_input_budget_from_limit(model_context_limit: Option<usize>) -> usize {
let context_limit = model_context_limit
.unwrap_or(DEFAULT_MODEL_CONTEXT_LIMIT)
.max(1);
context_limit
.saturating_mul(MODEL_INPUT_RATIO_NUMERATOR)
.saturating_div(MODEL_INPUT_RATIO_DENOMINATOR)
.max(1)
}
fn accumulate_usage(total: &mut TokenUsage, has_usage: &mut bool, usage: Option<TokenUsage>) {
if let Some(usage) = usage {
total.input_tokens += usage.input_tokens;
total.output_tokens += usage.output_tokens;
*has_usage = true;
}
}
}
#[cfg(test)]
mod tests {
use super::super::CompactService;
#[test]
fn room_summary_uses_eighty_five_percent_input_budget() {
assert_eq!(
CompactService::safe_model_input_budget_from_limit(Some(1000)),
850
);
}
#[test]
fn oversized_text_is_split_in_half() {
let (left, right) = CompactService::split_text_in_half("abcdefgh").unwrap();
assert_eq!(format!("{}{}", left, right), "abcdefgh");
assert!(!left.is_empty());
assert!(!right.is_empty());
}
}

View File

@ -1,209 +0,0 @@
use chrono::{DateTime, Utc};
use models::rooms::{
MessageContentType, MessageSenderType, room_message::Model as RoomMessageModel,
};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use uuid::Uuid;
use crate::tokent::TokenUsage;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MessageSummary {
pub id: Uuid,
pub sender_type: MessageSenderType,
pub sender_id: Option<Uuid>,
pub sender_name: String,
pub content: String,
pub content_type: MessageContentType,
/// Tool call ID extracted from message content JSON, if present.
pub tool_call_id: Option<String>,
pub send_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompactSummary {
pub session_id: Uuid,
pub room_id: Uuid,
pub retained: Vec<MessageSummary>,
pub summary: String,
pub compacted_at: DateTime<Utc>,
pub messages_compressed: usize,
/// Token usage for the compaction AI call. `None` if usage data was unavailable.
pub usage: Option<TokenUsage>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RoomCompactRecord {
pub id: Uuid,
pub room_id: Uuid,
pub from_seq: i64,
pub to_seq: i64,
pub summary: String,
pub message_count: i32,
pub source_message_ids: Vec<Uuid>,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RoomCompactContext {
pub room_id: Uuid,
pub cutoff_seq: Option<i64>,
pub summary: Option<String>,
pub retained: Vec<MessageSummary>,
pub estimated_tokens: usize,
pub compacted: bool,
}
#[derive(Debug, Clone, Copy)]
pub enum CompactLevel {
Light,
Aggressive,
}
impl CompactLevel {
pub fn retain_count(&self) -> usize {
match self {
CompactLevel::Light => 5,
CompactLevel::Aggressive => 2,
}
}
/// Auto-select level based on estimated token count and config.
///
/// - `Light` (retain 5): when tokens are moderately over threshold
/// - `Aggressive` (retain 2): when tokens are severely over threshold (2x+)
pub fn auto_select(estimated_tokens: usize, threshold: usize) -> Self {
if threshold == 0 {
return CompactLevel::Light;
}
if estimated_tokens >= threshold * 2 {
CompactLevel::Aggressive
} else {
CompactLevel::Light
}
}
}
/// Configuration for automatic compaction.
#[derive(Debug, Clone, Copy)]
pub struct CompactConfig {
/// Only trigger compaction when estimated token count exceeds this.
/// Set to 0 to disable threshold (always compact when messages > retain_count).
pub token_threshold: usize,
/// If true, auto-select level based on how far over the threshold we are.
/// If false, always use `default_level`.
pub auto_level: bool,
/// Fallback level when `auto_level` is false.
pub default_level: CompactLevel,
/// Maximum tokens the summary may contain (enforced via prompt).
pub max_summary_tokens: usize,
}
impl Default for CompactConfig {
fn default() -> Self {
Self {
token_threshold: 100_000,
auto_level: true,
default_level: CompactLevel::Light,
max_summary_tokens: 4096,
}
}
}
impl CompactConfig {
pub const MIN_SUMMARY_TOKENS: usize = 256;
pub const MAX_SUMMARY_TOKENS: usize = 4096;
/// Build config from project context settings.
pub fn from_project_setting(
context_window_tokens: i32,
compaction_threshold: f32,
compaction_max_summary_ratio: f32,
) -> Self {
let context_window_tokens = context_window_tokens.max(0) as usize;
let threshold = (context_window_tokens as f32 * compaction_threshold.max(0.0)) as usize;
Self {
token_threshold: threshold,
auto_level: true,
default_level: CompactLevel::Light,
max_summary_tokens: Self::summary_token_budget(
context_window_tokens,
compaction_max_summary_ratio,
),
}
}
pub fn summary_token_budget(
context_window_tokens: usize,
compaction_max_summary_ratio: f32,
) -> usize {
let ratio = compaction_max_summary_ratio.max(0.0);
let raw_budget = (context_window_tokens as f32 * ratio) as usize;
if raw_budget == 0 {
Self::MIN_SUMMARY_TOKENS
} else {
raw_budget.clamp(Self::MIN_SUMMARY_TOKENS, Self::MAX_SUMMARY_TOKENS)
}
}
}
/// Result of a threshold check before deciding whether to compact.
#[derive(Debug)]
pub enum ThresholdResult {
/// Token count is below threshold — skip compaction.
Skip { estimated_tokens: usize },
/// Token count exceeds threshold — compact with this level.
Compact {
estimated_tokens: usize,
level: CompactLevel,
},
}
impl From<RoomMessageModel> for MessageSummary {
fn from(m: RoomMessageModel) -> Self {
let sender_type = m.sender_type.clone();
let content = m.content.clone();
Self {
id: m.id,
sender_type: sender_type.clone(),
sender_id: m.sender_id,
sender_name: sender_type.to_string(),
content,
content_type: m.content_type.clone(),
tool_call_id: Self::extract_tool_call_id(&m.content),
send_at: m.send_at,
}
}
}
impl MessageSummary {
fn extract_tool_call_id(content: &str) -> Option<String> {
let content = content.trim();
if let Ok(v) = serde_json::from_str::<Value>(content) {
v.get("tool_call_id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::CompactConfig;
#[test]
fn summary_budget_has_minimum_floor() {
assert_eq!(CompactConfig::summary_token_budget(0, 0.0), 256);
assert_eq!(CompactConfig::summary_token_budget(128_000, 0.0), 256);
assert_eq!(CompactConfig::summary_token_budget(1_000, 0.01), 256);
}
#[test]
fn summary_budget_is_capped() {
assert_eq!(CompactConfig::summary_token_budget(128_000, 0.2), 4096);
}
}

View File

@ -1,63 +0,0 @@
/// Maximum characters per chunk for embedding (approximates token limit).
/// text-embedding-3-small: 8192 token limit.
/// For CJK ~1 char/token, for English ~4 chars/token.
/// Conservative limit: 7000 chars to leave room for all languages.
const MAX_CHUNK_CHARS: usize = 7000;
/// Split long text into chunks at paragraph/sentence boundaries.
/// Returns at least one chunk even for empty text.
/// Safe for multi-byte characters (uses char indices, not byte indices).
pub fn chunk_text(text: &str) -> Vec<String> {
if text.is_empty() {
return vec![String::new()];
}
if text.len() <= MAX_CHUNK_CHARS {
return vec![text.to_string()];
}
let char_indices: Vec<usize> = text.char_indices().map(|(i, _)| i).collect();
let total_chars = char_indices.len();
let mut chunks = Vec::new();
let mut start_idx = 0;
while start_idx < total_chars {
let byte_start = char_indices[start_idx];
let end_char_idx = (start_idx + MAX_CHUNK_CHARS).min(total_chars);
let byte_end_candidate = char_indices[end_char_idx - 1]
+ text[char_indices[end_char_idx - 1]..]
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1);
if end_char_idx >= total_chars {
chunks.push(text[byte_start..].to_string());
break;
}
let search_range = &text[byte_start..byte_end_candidate];
let break_at = search_range
.rfind("\n\n")
.map(|pos| pos + 2)
.or_else(|| search_range.rfind('\n').map(|pos| pos + 1))
.or_else(|| search_range.rfind(". ").map(|pos| pos + 1))
.or_else(|| search_range.rfind("! ").map(|pos| pos + 1))
.or_else(|| search_range.rfind("? ").map(|pos| pos + 1));
if let Some(offset) = break_at {
let byte_end = byte_start + offset;
chunks.push(text[byte_start..byte_end].to_string());
let mut advance = start_idx + 1;
while advance < total_chars && char_indices[advance] < byte_end {
advance += 1;
}
start_idx = advance;
} else {
chunks.push(text[byte_start..byte_end_candidate].to_string());
start_idx = end_char_idx;
}
}
chunks
}

View File

@ -1,291 +0,0 @@
use rig::client::EmbeddingsClient;
use rig::embeddings::EmbeddingModel;
use rig::providers::openai::Client as OpenAiClient;
use serde::{Deserialize, Serialize};
use crate::embed::qdrant::QdrantClient;
pub struct EmbedClient {
openai: OpenAiClient,
qdrant: QdrantClient,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbedVector {
pub id: String,
pub vector: Vec<f32>,
pub payload: EmbedPayload,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbedPayload {
pub entity_type: String,
pub entity_id: String,
pub text: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub extra: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub id: String,
pub score: f32,
pub payload: EmbedPayload,
}
impl EmbedClient {
pub fn new(openai: OpenAiClient, qdrant: QdrantClient) -> Self {
Self { openai, qdrant }
}
pub async fn embed_text(&self, text: &str, model: &str) -> crate::Result<Vec<f32>> {
let model = self.openai.embedding_model(model);
let embeddings = model
.embed_texts(vec![text.to_string()])
.await
.map_err(|e| crate::AgentError::OpenAi(format!("embedding failed: {}", e)))?;
embeddings
.first()
.map(|e| e.vec.iter().map(|v| *v as f32).collect())
.ok_or_else(|| crate::AgentError::OpenAi("no embedding returned".into()))
}
pub async fn embed_batch(&self, texts: &[String], model: &str) -> crate::Result<Vec<Vec<f32>>> {
let model = self.openai.embedding_model(model);
let embeddings = model
.embed_texts(texts.to_vec())
.await
.map_err(|e| crate::AgentError::OpenAi(format!("embedding batch failed: {}", e)))?;
tracing::debug!(
input_count = texts.len(),
returned_count = embeddings.len(),
"embed_batch: API returned"
);
let mut result = vec![Vec::new(); texts.len()];
for (idx, embedding) in embeddings.into_iter().enumerate() {
if idx < result.len() {
result[idx] = embedding.vec.iter().map(|v| *v as f32).collect();
continue;
}
tracing::warn!(
idx,
"embed_batch: provider returned more embeddings than requested"
);
break;
}
// Check for empty results
let empty_count = result.iter().filter(|v| v.is_empty()).count();
if empty_count > 0 {
tracing::warn!(
empty_count = empty_count,
total = texts.len(),
"embed_batch: some embeddings returned empty vectors"
);
}
Ok(result)
}
pub async fn upsert(&self, points: Vec<EmbedVector>) -> crate::Result<()> {
self.qdrant.upsert_points(points).await
}
/// Upsert points into a named collection (bypasses entity_type routing).
pub async fn upsert_to_collection(
&self,
collection_name: &str,
points: Vec<EmbedVector>,
) -> crate::Result<()> {
self.qdrant
.upsert_to_collection(collection_name, points)
.await
}
pub async fn search(
&self,
query: &str,
entity_type: &str,
model: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
let vector = self.embed_text(query, model).await?;
self.qdrant.search(&vector, entity_type, limit).await
}
pub async fn search_with_filter(
&self,
query: &str,
entity_type: &str,
model: &str,
limit: usize,
filter: qdrant_client::qdrant::Filter,
) -> crate::Result<Vec<SearchResult>> {
let vector = self.embed_text(query, model).await?;
self.qdrant
.search_with_filter(&vector, entity_type, limit, filter)
.await
}
pub async fn delete_by_entity_id(
&self,
entity_type: &str,
entity_id: &str,
) -> crate::Result<()> {
self.qdrant.delete_by_filter(entity_type, entity_id).await
}
pub async fn ensure_collection(&self, entity_type: &str, dimensions: u64) -> crate::Result<()> {
self.qdrant.ensure_collection(entity_type, dimensions).await
}
pub async fn ensure_skill_collection(&self, dimensions: u64) -> crate::Result<()> {
self.qdrant.ensure_skill_collection(dimensions).await
}
/// Ensure a room-specific memory collection exists.
pub async fn ensure_room_memory_collection(
&self,
project_name: &str,
room_id: &str,
dimensions: u64,
) -> crate::Result<()> {
self.qdrant
.ensure_room_memory_collection(project_name, room_id, dimensions)
.await
}
/// Embed and store a conversation memory (message) in Qdrant.
/// Uses per-room collection: `room:{project_name}:{room_id}`.
pub async fn embed_memory(
&self,
id: &str,
text: &str,
project_name: &str,
room_id: &str,
user_id: Option<&str>,
model: &str,
) -> crate::Result<()> {
// Compute embedding first to know dimensions
let vector = self.embed_text(text, model).await?;
let collection =
crate::embed::qdrant::QdrantClient::room_memory_collection_name(project_name, room_id);
// Auto-create the room collection with correct dimensions
self.qdrant
.ensure_room_memory_collection(project_name, room_id, vector.len() as u64)
.await?;
let point = EmbedVector {
id: id.to_string(),
vector,
payload: EmbedPayload {
entity_type: "memory".to_string(),
entity_id: room_id.to_string(),
text: text.to_string(),
extra: serde_json::json!({ "user_id": user_id }).into(),
},
};
self.qdrant
.upsert_to_collection(&collection, vec![point])
.await
}
/// Search memory embeddings by semantic similarity within a room.
/// Searches the per-room collection directly — no post-filtering needed.
pub async fn search_memories(
&self,
query: &str,
model: &str,
project_name: &str,
room_id: &str,
limit: usize,
dimensions: u64,
) -> crate::Result<Vec<SearchResult>> {
let vector = self.embed_text(query, model).await?;
let collection =
crate::embed::qdrant::QdrantClient::room_memory_collection_name(project_name, room_id);
// Ensure collection exists (will be no-op if already created)
self.qdrant
.ensure_room_memory_collection(project_name, room_id, dimensions)
.await?;
self.qdrant
.search_collection(&collection, &vector, limit)
.await
}
pub async fn search_memories_after_seq(
&self,
query: &str,
model: &str,
project_name: &str,
room_id: &str,
limit: usize,
dimensions: u64,
after_seq: Option<i64>,
) -> crate::Result<Vec<SearchResult>> {
let fetch_limit = if after_seq.is_some() {
limit.saturating_mul(4).max(limit)
} else {
limit
};
let mut results = self
.search_memories(query, model, project_name, room_id, fetch_limit, dimensions)
.await?;
if let Some(cutoff) = after_seq {
results.retain(|r| {
r.payload
.extra
.as_ref()
.and_then(|v| v.get("seq"))
.and_then(|v| v.as_i64())
.map(|seq| seq > cutoff)
.unwrap_or(false)
});
}
results.truncate(limit);
Ok(results)
}
/// Embed and store a skill in Qdrant.
pub async fn embed_skill(
&self,
id: &str,
name: &str,
description: &str,
content: &str,
project_uuid: &str,
model: &str,
) -> crate::Result<()> {
let text = format!("{}: {} {}", name, description, content);
let vector = self.embed_text(&text, model).await?;
let point = EmbedVector {
id: id.to_string(),
vector,
payload: EmbedPayload {
entity_type: "skill".to_string(),
entity_id: project_uuid.to_string(),
text,
extra: serde_json::json!({ "name": name, "description": description }).into(),
},
};
self.qdrant.upsert_points(vec![point]).await
}
/// Search skill embeddings by semantic similarity within a project.
pub async fn search_skills(
&self,
query: &str,
model: &str,
project_uuid: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
let vector = self.embed_text(query, model).await?;
let mut results = self.qdrant.search_skill(&vector, limit + 1).await?;
results.retain(|r| r.payload.entity_id == project_uuid);
results.truncate(limit);
Ok(results)
}
}

View File

@ -1,24 +0,0 @@
use async_trait::async_trait;
/// Trait for entities that can be embedded as vectors into Qdrant.
#[async_trait]
pub trait Embeddable {
fn entity_type(&self) -> &'static str;
fn to_text(&self) -> String;
fn entity_id(&self) -> String;
}
/// Input struct for batch memory embedding into per-room Qdrant collections.
#[derive(Debug, Clone)]
pub struct EmbedMemoryInput {
pub message_id: String,
pub seq: i64,
pub content: String,
pub project_name: String,
pub room_id: String,
pub user_id: Option<String>,
pub sender_type: String,
}
/// Input struct for batch tag embedding.
pub use models::TagEmbedInput;

View File

@ -1,369 +0,0 @@
use std::collections::HashMap;
use super::chunk::chunk_text;
use super::client::{EmbedPayload, EmbedVector};
use super::embeddable::{EmbedMemoryInput, Embeddable};
/// Embedding and upsert operations for entity vectors in Qdrant.
impl super::EmbedService {
pub async fn embed_issue(
&self,
id: &str,
title: &str,
body: Option<&str>,
) -> crate::Result<()> {
let text = match body {
Some(b) if !b.is_empty() => format!("{}\n\n{}", title, b),
_ => title.to_string(),
};
tracing::debug!(issue_id = %id, text_len = text.len(), "embed_issue: calling embedding API");
let vector = self.client.embed_text(&text, &self.model_name).await?;
tracing::debug!(issue_id = %id, vec_dim = vector.len(), "embed_issue: embedding done");
let point = EmbedVector {
id: id.to_string(),
vector,
payload: EmbedPayload {
entity_type: "issue".to_string(),
entity_id: id.to_string(),
text,
extra: None,
},
};
self.client.upsert(vec![point]).await?;
tracing::info!(issue_id = %id, "embed_issue: upsert complete");
Ok(())
}
pub async fn embed_repo(
&self,
id: &str,
name: &str,
description: Option<&str>,
) -> crate::Result<()> {
let text = match description {
Some(d) if !d.is_empty() => format!("{}: {}", name, d),
_ => name.to_string(),
};
tracing::debug!(repo_id = %id, text_len = text.len(), "embed_repo: calling embedding API");
let vector = self.client.embed_text(&text, &self.model_name).await?;
tracing::debug!(repo_id = %id, vec_dim = vector.len(), "embed_repo: embedding done");
let point = EmbedVector {
id: id.to_string(),
vector,
payload: EmbedPayload {
entity_type: "repo".to_string(),
entity_id: id.to_string(),
text,
extra: None,
},
};
self.client.upsert(vec![point]).await?;
tracing::info!(repo_id = %id, "embed_repo: upsert complete");
Ok(())
}
pub async fn embed_issues<T: Embeddable + Send + Sync>(
&self,
items: Vec<T>,
) -> crate::Result<()> {
if items.is_empty() {
return Ok(());
}
let texts: Vec<String> = items.iter().map(|i| i.to_text()).collect();
tracing::debug!(count = texts.len(), "embed_issues: calling embed_batch");
let embeddings = self.client.embed_batch(&texts, &self.model_name).await?;
tracing::debug!(count = embeddings.len(), "embed_issues: batch done");
let points: Vec<EmbedVector> = items
.into_iter()
.zip(embeddings.into_iter())
.map(|(item, vector)| EmbedVector {
id: item.entity_id(),
vector,
payload: EmbedPayload {
entity_type: item.entity_type().to_string(),
entity_id: item.entity_id(),
text: item.to_text(),
extra: None,
},
})
.collect();
let count = points.len();
self.client.upsert(points).await?;
tracing::info!(count = count, "embed_issues: upsert complete");
Ok(())
}
pub async fn embed_skill(
&self,
skill_id: i64,
name: &str,
description: Option<&str>,
content: &str,
project_uuid: &str,
) -> crate::Result<()> {
let desc = description.unwrap_or_default();
let id = skill_id.to_string();
tracing::debug!(skill_id = %skill_id, name = %name, content_len = content.len(), "embed_skill: starting");
let texts = chunk_text(content);
tracing::debug!(skill_id = %skill_id, chunks = texts.len(), "embed_skill: chunked");
if texts.len() == 1 {
self.client
.embed_skill(&id, name, desc, content, project_uuid, &self.model_name)
.await?;
} else {
let full_texts: Vec<String> = texts
.iter()
.map(|t| format!("{}: {} {}", name, desc, t))
.collect();
tracing::debug!(skill_id = %skill_id, "embed_skill: calling embed_batch");
let embeddings = self
.client
.embed_batch(&full_texts, &self.model_name)
.await?;
let points: Vec<EmbedVector> = embeddings
.into_iter()
.enumerate()
.map(|(i, vector)| EmbedVector {
id: format!("{}:chunk:{}", id, i),
vector,
payload: EmbedPayload {
entity_type: "skill".to_string(),
entity_id: project_uuid.to_string(),
text: texts[i].clone(),
extra: serde_json::json!({
"name": name,
"description": desc,
"chunk_index": i,
"total_chunks": texts.len(),
})
.into(),
},
})
.collect();
self.client.upsert(points).await?;
}
tracing::info!(skill_id = %skill_id, chunks = texts.len(), "embed_skill: complete");
Ok(())
}
pub async fn embed_issue_chunked(
&self,
id: &str,
title: &str,
body: Option<&str>,
) -> crate::Result<()> {
let text = match body {
Some(b) if !b.is_empty() => format!("{}\n\n{}", title, b),
_ => title.to_string(),
};
let chunks = chunk_text(&text);
if chunks.len() == 1 {
return self.embed_issue(id, title, body).await;
}
let embeddings = self.client.embed_batch(&chunks, &self.model_name).await?;
let points: Vec<EmbedVector> = embeddings
.into_iter()
.enumerate()
.map(|(i, vector)| EmbedVector {
id: format!("{}:chunk:{}", id, i),
vector,
payload: EmbedPayload {
entity_type: "issue".to_string(),
entity_id: id.to_string(),
text: chunks[i].clone(),
extra: serde_json::json!({
"chunk_index": i,
"total_chunks": chunks.len(),
})
.into(),
},
})
.collect();
self.client.upsert(points).await
}
pub async fn embed_memories_batch(&self, messages: Vec<EmbedMemoryInput>) -> crate::Result<()> {
if messages.is_empty() {
return Ok(());
}
let mut by_room: HashMap<String, Vec<(EmbedMemoryInput, Vec<String>)>> = HashMap::new();
for msg in messages {
let chunks = chunk_text(&msg.content);
if chunks.is_empty() || chunks.iter().all(|c| c.trim().is_empty()) {
continue;
}
let collection = super::qdrant::QdrantClient::room_memory_collection_name(
&msg.project_name,
&msg.room_id,
);
by_room.entry(collection).or_default().push((msg, chunks));
}
for (collection, entries) in &by_room {
let all_texts: Vec<String> = entries
.iter()
.flat_map(|(_, chunks)| chunks.iter().cloned())
.collect();
if all_texts.is_empty() {
continue;
}
let embeddings = self
.client
.embed_batch(&all_texts, &self.model_name)
.await?;
if let Some((first, _)) = entries.first() {
let _ = self
.client
.ensure_room_memory_collection(
&first.project_name,
&first.room_id,
self.dimensions,
)
.await;
}
let mut points = Vec::new();
let mut embed_idx = 0;
for (msg, chunks) in entries {
for (chunk_i, chunk) in chunks.iter().enumerate() {
if embed_idx >= embeddings.len() {
break;
}
let point_id = if chunks.len() == 1 {
msg.message_id.clone()
} else {
format!("{}:chunk:{}", msg.message_id, chunk_i)
};
points.push(EmbedVector {
id: point_id,
vector: embeddings[embed_idx].clone(),
payload: EmbedPayload {
entity_type: "memory".to_string(),
entity_id: msg.room_id.clone(),
text: chunk.clone(),
extra: serde_json::json!({
"message_id": msg.message_id,
"seq": msg.seq,
"user_id": msg.user_id,
"sender_type": msg.sender_type,
"chunk_index": if chunks.len() > 1 {
Some(chunk_i)
} else {
None
},
"total_chunks": if chunks.len() > 1 {
Some(chunks.len())
} else {
None
},
})
.into(),
},
});
embed_idx += 1;
}
}
if let Err(e) = self.client.upsert_to_collection(collection, points).await {
tracing::warn!(collection = %collection, error = %e, "batch memory embed failed");
}
}
Ok(())
}
pub async fn embed_tags_batch(
&self,
tags: Vec<super::embeddable::TagEmbedInput>,
) -> crate::Result<()> {
if tags.is_empty() {
return Ok(());
}
let texts: Vec<String> = tags
.iter()
.map(|t| {
if let Some(ref desc) = t.description {
if !desc.is_empty() {
format!("{}: {}", t.name, desc)
} else {
t.name.clone()
}
} else {
t.name.clone()
}
})
.collect();
let embeddings = self.client.embed_batch(&texts, &self.model_name).await?;
let points: Vec<EmbedVector> = tags
.into_iter()
.zip(embeddings.into_iter())
.map(|(tag, vector)| {
let point_id = format!("{}:{}", tag.repo_id, tag.name);
EmbedVector {
id: point_id,
vector,
payload: EmbedPayload {
entity_type: "repo_tag".to_string(),
entity_id: tag.project_id.clone(),
text: tag.name.clone(),
extra: serde_json::json!({
"repo_id": tag.repo_id,
"repo_name": tag.repo_name,
"tag_name": tag.name,
"description": tag.description,
})
.into(),
},
}
})
.collect();
self.client.upsert(points).await
}
pub async fn embed_memory(
&self,
message_id: &str,
text: &str,
project_name: &str,
room_id: &str,
user_id: Option<&str>,
) -> crate::Result<()> {
self.client
.embed_memory(
message_id,
text,
project_name,
room_id,
user_id,
&self.model_name,
)
.await
}
}

View File

@ -1,90 +0,0 @@
pub mod chunk;
pub mod client;
pub mod embeddable;
pub mod entity_embed;
pub mod qdrant;
pub mod search;
pub use client::{EmbedClient, EmbedPayload, EmbedVector, SearchResult};
pub use embeddable::{EmbedMemoryInput, Embeddable, TagEmbedInput};
pub use qdrant::QdrantClient;
use std::sync::Arc;
#[derive(Clone)]
pub struct EmbedService {
client: Arc<EmbedClient>,
db: sea_orm::DatabaseConnection,
model_name: String,
dimensions: u64,
}
impl EmbedService {
pub fn new(
client: EmbedClient,
db: sea_orm::DatabaseConnection,
model_name: String,
dimensions: u64,
) -> Self {
Self {
client: Arc::new(client),
db,
model_name,
dimensions,
}
}
pub async fn ensure_collections(&self) -> crate::Result<()> {
self.client
.ensure_collection("issue", self.dimensions)
.await?;
self.client
.ensure_collection("repo", self.dimensions)
.await?;
self.client.ensure_skill_collection(self.dimensions).await?;
self.client
.ensure_collection("repo_tag", self.dimensions)
.await?;
Ok(())
}
pub fn db(&self) -> &sea_orm::DatabaseConnection {
&self.db
}
pub fn client(&self) -> &Arc<EmbedClient> {
&self.client
}
pub fn model_name(&self) -> &str {
&self.model_name
}
pub fn dimensions(&self) -> u64 {
self.dimensions
}
}
pub async fn new_embed_client(config: &config::AppConfig) -> crate::Result<EmbedClient> {
let base_url = config
.get_embed_model_base_url()
.map_err(|e| crate::AgentError::Internal(e.to_string()))?;
let api_key = config
.get_embed_model_api_key()
.map_err(|e| crate::AgentError::Internal(e.to_string()))?;
let qdrant_url = config
.get_qdrant_url()
.map_err(|e| crate::AgentError::Internal(e.to_string()))?;
let qdrant_api_key = config.get_qdrant_api_key();
let openai = rig::providers::openai::Client::builder()
.api_key(&api_key)
.base_url(&base_url)
.build()
.map_err(|e| {
crate::AgentError::Internal(format!("failed to build rig openai client: {}", e))
})?;
let qdrant = QdrantClient::new(&qdrant_url, qdrant_api_key.as_deref()).await?;
Ok(EmbedClient::new(openai, qdrant))
}

View File

@ -1,373 +0,0 @@
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{
Condition, CreateCollectionBuilder, DeletePointsBuilder, Distance, FieldCondition, Filter,
Match, PointStruct, SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, Vectors,
condition::ConditionOneOf, r#match::MatchValue, point_id::PointIdOptions, value,
};
use std::collections::HashMap;
use std::sync::Arc;
use super::client::{EmbedPayload, SearchResult};
use crate::embed::client::EmbedVector;
pub struct QdrantClient {
inner: Arc<Qdrant>,
}
impl Clone for QdrantClient {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
}
impl QdrantClient {
pub async fn new(url: &str, api_key: Option<&str>) -> crate::Result<Self> {
let mut builder = Qdrant::from_url(url);
if let Some(key) = api_key {
builder = builder.api_key(key);
}
let inner = builder
.build()
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(Self {
inner: Arc::new(inner),
})
}
fn collection_name(entity_type: &str) -> String {
format!("embed_{}", entity_type)
}
/// Generate the collection name for a room's memory vectors.
pub fn room_memory_collection_name(project_name: &str, room_id: &str) -> String {
let _ = project_name;
format!("room_memory_{}", room_id.replace('-', "_"))
}
pub async fn ensure_collection(&self, entity_type: &str, dimensions: u64) -> crate::Result<()> {
let name = Self::collection_name(entity_type);
self.ensure_collection_named(&name, dimensions).await
}
async fn ensure_collection_named(&self, name: &str, dimensions: u64) -> crate::Result<()> {
let exists = self
.inner
.collection_exists(name)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
if exists {
return Ok(());
}
let create_collection = CreateCollectionBuilder::new(name)
.vectors_config(VectorParamsBuilder::new(dimensions, Distance::Cosine))
.build();
self.inner
.create_collection(create_collection)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(())
}
/// Ensure a room-specific memory collection exists.
pub async fn ensure_room_memory_collection(
&self,
project_name: &str,
room_id: &str,
dimensions: u64,
) -> crate::Result<()> {
let name = Self::room_memory_collection_name(project_name, room_id);
self.ensure_collection_named(&name, dimensions).await
}
pub async fn upsert_points(&self, points: Vec<EmbedVector>) -> crate::Result<()> {
if points.is_empty() {
return Ok(());
}
// Reject empty vectors — they cause Qdrant to reject the entire batch
let empty_vectors = points.iter().filter(|p| p.vector.is_empty()).count();
if empty_vectors > 0 {
tracing::error!(
empty_count = empty_vectors,
total = points.len(),
"upsert_points: REJECTING points with empty vectors"
);
return Err(crate::AgentError::Qdrant(format!(
"refusing to upsert {} points with empty vectors",
empty_vectors
)));
}
let collection_name = Self::collection_name(&points[0].payload.entity_type);
self.upsert_to_collection(&collection_name, points).await
}
/// Upsert points into a specific collection by name.
pub async fn upsert_to_collection(
&self,
collection_name: &str,
points: Vec<EmbedVector>,
) -> crate::Result<()> {
if points.is_empty() {
return Ok(());
}
let qdrant_points: Vec<PointStruct> = points
.into_iter()
.map(|p| {
let mut payload: HashMap<String, qdrant_client::qdrant::Value> = HashMap::new();
payload.insert("entity_type".to_string(), p.payload.entity_type.into());
payload.insert("entity_id".to_string(), p.payload.entity_id.into());
payload.insert("text".to_string(), p.payload.text.into());
if let Some(extra) = p.payload.extra {
let extra_str = serde_json::to_string(&extra).unwrap_or_default();
payload.insert(
"extra".to_string(),
qdrant_client::qdrant::Value {
kind: Some(qdrant_client::qdrant::value::Kind::StringValue(extra_str)),
},
);
}
PointStruct::new(p.id, Vectors::from(p.vector), payload)
})
.collect();
let upsert = UpsertPointsBuilder::new(collection_name, qdrant_points).build();
self.inner
.upsert_points(upsert)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(())
}
fn extract_string(value: &qdrant_client::qdrant::Value) -> String {
match &value.kind {
Some(value::Kind::StringValue(s)) => s.clone(),
_ => String::new(),
}
}
pub async fn search(
&self,
vector: &[f32],
entity_type: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
let collection_name = Self::collection_name(entity_type);
self.search_collection(&collection_name, vector, limit)
.await
}
/// Search a specific collection by name.
pub async fn search_collection(
&self,
collection_name: &str,
vector: &[f32],
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
let search_req = SearchPointsBuilder::new(collection_name, vector.to_vec(), limit as u64)
.with_payload(true)
.build();
let results = self
.inner
.search_points(search_req)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(results
.result
.into_iter()
.filter_map(|p| {
let entity_type = p
.payload
.get(&"entity_type".to_string())
.map(Self::extract_string)
.unwrap_or_default();
let entity_id = p
.payload
.get(&"entity_id".to_string())
.map(Self::extract_string)
.unwrap_or_default();
let text = p
.payload
.get(&"text".to_string())
.map(Self::extract_string)
.unwrap_or_default();
let extra = p.payload.get(&"extra".to_string()).and_then(|v| {
let s = Self::extract_string(v);
if s.is_empty() {
None
} else {
serde_json::from_str::<serde_json::Value>(&s).ok()
}
});
let id =
p.id.and_then(|id| id.point_id_options)
.map(|opts| match opts {
PointIdOptions::Uuid(s) => s,
PointIdOptions::Num(n) => n.to_string(),
})
.unwrap_or_default();
Some(SearchResult {
id,
score: p.score,
payload: EmbedPayload {
entity_type,
entity_id,
text,
extra,
},
})
})
.collect())
}
pub async fn search_with_filter(
&self,
vector: &[f32],
entity_type: &str,
limit: usize,
filter: Filter,
) -> crate::Result<Vec<SearchResult>> {
let collection_name = Self::collection_name(entity_type);
let search = SearchPointsBuilder::new(collection_name, vector.to_vec(), limit as u64)
.with_payload(true)
.filter(filter)
.build();
let results = self
.inner
.search_points(search)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(results
.result
.into_iter()
.filter_map(|p| {
let entity_type = p
.payload
.get(&"entity_type".to_string())
.map(Self::extract_string)
.unwrap_or_default();
let entity_id = p
.payload
.get(&"entity_id".to_string())
.map(Self::extract_string)
.unwrap_or_default();
let text = p
.payload
.get(&"text".to_string())
.map(Self::extract_string)
.unwrap_or_default();
let extra = p.payload.get(&"extra".to_string()).and_then(|v| {
let s = Self::extract_string(v);
if s.is_empty() {
None
} else {
serde_json::from_str::<serde_json::Value>(&s).ok()
}
});
let id =
p.id.and_then(|id| id.point_id_options)
.map(|opts| match opts {
PointIdOptions::Uuid(s) => s,
PointIdOptions::Num(n) => n.to_string(),
})
.unwrap_or_default();
Some(SearchResult {
id,
score: p.score,
payload: EmbedPayload {
entity_type,
entity_id,
text,
extra,
},
})
})
.collect())
}
pub async fn delete_by_filter(&self, entity_type: &str, entity_id: &str) -> crate::Result<()> {
let collection_name = Self::collection_name(entity_type);
let filter = Filter {
must: vec![Condition {
condition_one_of: Some(ConditionOneOf::Field(FieldCondition {
key: "entity_id".to_string(),
r#match: Some(Match {
match_value: Some(MatchValue::Keyword(entity_id.to_string())),
}),
..Default::default()
})),
}],
..Default::default()
};
let delete = DeletePointsBuilder::new(collection_name)
.points(filter)
.build();
self.inner
.delete_points(delete)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(())
}
pub async fn delete_collection(&self, entity_type: &str) -> crate::Result<()> {
let name = Self::collection_name(entity_type);
self.inner
.delete_collection(name)
.await
.map_err(|e| crate::AgentError::Qdrant(e.to_string()))?;
Ok(())
}
pub async fn ensure_memory_collection(&self, dimensions: u64) -> crate::Result<()> {
self.ensure_collection("memory", dimensions).await
}
pub async fn ensure_skill_collection(&self, dimensions: u64) -> crate::Result<()> {
self.ensure_collection("skill", dimensions).await
}
pub async fn search_memory(
&self,
vector: &[f32],
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
self.search(vector, "memory", limit).await
}
pub async fn search_skill(
&self,
vector: &[f32],
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
self.search(vector, "skill", limit).await
}
}

View File

@ -1,107 +0,0 @@
use qdrant_client::qdrant::Filter;
use super::client::SearchResult;
/// Vector search operations for Qdrant-backed entity retrieval.
impl super::EmbedService {
pub async fn search_issues(
&self,
query: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
self.client
.search(query, "issue", &self.model_name, limit)
.await
}
pub async fn search_repos(
&self,
query: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
self.client
.search(query, "repo", &self.model_name, limit)
.await
}
pub async fn search_issues_filtered(
&self,
query: &str,
limit: usize,
filter: Filter,
) -> crate::Result<Vec<SearchResult>> {
self.client
.search_with_filter(query, "issue", &self.model_name, limit, filter)
.await
}
/// Search repo tags by semantic similarity within a project.
/// Filters by project_id (stored in entity_id) for project isolation.
pub async fn search_tags(
&self,
query: &str,
project_id: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
let mut results = self
.client
.search(query, "repo_tag", &self.model_name, limit + 1)
.await?;
results.retain(|r| r.payload.entity_id == project_id);
results.truncate(limit);
Ok(results)
}
/// Search skills by semantic similarity within a project.
pub async fn search_skills(
&self,
query: &str,
project_uuid: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
self.client
.search_skills(query, &self.model_name, project_uuid, limit)
.await
}
/// Search past conversation messages by semantic similarity within a room.
pub async fn search_memories(
&self,
query: &str,
project_name: &str,
room_id: &str,
limit: usize,
) -> crate::Result<Vec<SearchResult>> {
self.client
.search_memories(
query,
&self.model_name,
project_name,
room_id,
limit,
self.dimensions,
)
.await
}
pub async fn search_memories_after_seq(
&self,
query: &str,
project_name: &str,
room_id: &str,
limit: usize,
after_seq: Option<i64>,
) -> crate::Result<Vec<SearchResult>> {
self.client
.search_memories_after_seq(
query,
&self.model_name,
project_name,
room_id,
limit,
self.dimensions,
after_seq,
)
.await
}
}

View File

@ -1,63 +0,0 @@
use thiserror::Error;
#[derive(Error, Debug)]
pub enum AgentError {
#[error("openai error: {0}")]
OpenAi(String),
#[error("qdrant error: {0}")]
Qdrant(String),
#[error("internal error: {0}")]
Internal(String),
#[error("not found: {0}")]
NotFound(String),
/// The task exceeded its timeout limit.
#[error("task {task_id} timed out after {seconds}s")]
Timeout { task_id: i64, seconds: u64 },
/// The agent has been rate-limited; retry after the indicated delay.
#[error("rate limited, retry after {retry_after_secs}s")]
RateLimited { retry_after_secs: u64 },
/// A transient error that can be retried.
#[error("retryable error (attempt {attempt}): {message}")]
Retryable { attempt: u32, message: String },
/// The requested tool is not registered in the tool registry.
#[error("tool not found: {tool}")]
ToolNotFound { tool: String },
/// A tool execution failed.
#[error("tool '{tool}' execution failed: {cause}")]
ToolExecutionFailed { tool: String, cause: String },
/// The request contains invalid input.
#[error("invalid input in '{field}': {reason}")]
InvalidInput { field: String, reason: String },
}
pub type Result<T> = std::result::Result<T, AgentError>;
impl From<qdrant_client::QdrantError> for AgentError {
fn from(e: qdrant_client::QdrantError) -> Self {
AgentError::Qdrant(e.to_string())
}
}
impl From<sea_orm::DbErr> for AgentError {
fn from(e: sea_orm::DbErr) -> Self {
AgentError::Internal(e.to_string())
}
}
impl From<crate::tool::ToolError> for AgentError {
fn from(e: crate::tool::ToolError) -> Self {
AgentError::ToolExecutionFailed {
tool: String::new(),
cause: e.to_string(),
}
}
}

View File

@ -1,60 +0,0 @@
pub mod agent;
pub mod billing;
pub mod chat;
pub mod client;
pub mod compact;
pub mod embed;
pub mod error;
pub mod model;
pub mod orao;
pub mod perception;
pub mod react;
pub mod skills;
pub mod sync;
pub mod task;
pub mod tokent;
pub mod tool;
pub use billing::{
BillingRecord, BillingResult, check_balance, check_user_balance, initialize_project_billing,
initialize_user_billing, persist_billing_error, record_ai_usage, record_user_ai_usage,
};
pub use chat::{
AgentExecutionProfile, AgentRole, AiContextSenderType, AiRequest, AiStreamChunk, ChatService,
Mention, RoomMessageContext, StreamCallback,
};
pub use client::types::ChatRequestMessage;
pub use client::{AiCallResponse, AiClientConfig, call_with_params, call_with_retry};
pub use compact::{
CompactConfig, CompactLevel, CompactService, CompactSummary, MessageSummary,
RoomCompactContext, RoomCompactRecord,
};
pub use embed::{
EmbedClient, EmbedMemoryInput, EmbedService, QdrantClient, SearchResult, TagEmbedInput,
new_embed_client,
};
pub use error::{AgentError, Result};
pub use orao::{
ActionExecutor, ActionResult, ActionType, ActionVerdict, OraoConfig, OraoExecutor,
OraoExecutorBuilder, OraoOutcome, OraoStep, PerceptionSnapshot, PlannedAction, ReasoningOutput,
RoundRecord, SafetyLevel,
};
pub use perception::{PerceptionService, SkillContext, SkillEntry, ToolCallEvent};
pub use react::{
DEFAULT_SYSTEM_PROMPT, PERSONAL_CONTEXT_PROMPT, ROOM_CONTEXT_PROMPT, ReactConfig, ReactStep,
};
pub use skills::{
BuiltInSkill, SKILL_TEMPLATES, all_skill_slugs, all_skills, get_skill, get_skill_by_tool,
is_built_in_skill, match_skill_by_keyword, skills_by_category,
};
pub use sync::list_accessible_models;
pub use task::TaskService;
pub use tokent::{TokenUsage, resolve_usage};
pub use tool::{
ToolCall, ToolCallRecord, ToolCallRecorder, ToolCallResult, ToolContext, ToolDefinition,
ToolError, ToolExecutor, ToolHandler, ToolParam, ToolRegistry, ToolResult, ToolSchema,
};
#[cfg(feature = "rig")]
pub use agent::RigAgentService;
#[cfg(feature = "rig")]
pub use tool::{RecordingTool, RigToolSet, is_retryable_tool_error};

View File

@ -1,117 +0,0 @@
//! Model capability management — CRUD.
use chrono::Utc;
use db::database::AppDatabase;
use models::agents::CapabilityType;
use models::agents::model_capability;
use sea_orm::*;
use crate::error::AgentError;
#[derive(Debug, Clone, serde::Deserialize, utoipa::ToSchema)]
pub struct CreateModelCapabilityRequest {
pub model_version_id: i64,
pub capability: String,
#[serde(default)]
pub is_supported: bool,
}
#[derive(Debug, Clone, serde::Deserialize, utoipa::ToSchema)]
pub struct UpdateModelCapabilityRequest {
pub is_supported: Option<bool>,
}
#[derive(Debug, Clone, serde::Serialize, utoipa::ToSchema)]
pub struct ModelCapabilityResponse {
pub id: i64,
pub model_version_id: i64,
pub capability: String,
pub is_supported: bool,
pub created_at: chrono::DateTime<Utc>,
}
impl From<model_capability::Model> for ModelCapabilityResponse {
fn from(mc: model_capability::Model) -> Self {
Self {
id: mc.id,
model_version_id: mc.model_version_id,
capability: mc.capability,
is_supported: mc.is_supported,
created_at: mc.created_at,
}
}
}
pub async fn list_capabilities(
db: &AppDatabase,
model_version_id: i64,
) -> Result<Vec<ModelCapabilityResponse>, AgentError> {
let caps = model_capability::Entity::find()
.filter(model_capability::Column::ModelVersionId.eq(model_version_id))
.order_by_asc(model_capability::Column::Capability)
.all(db)
.await?;
Ok(caps
.into_iter()
.map(ModelCapabilityResponse::from)
.collect())
}
pub async fn get_capability(
db: &AppDatabase,
id: i64,
) -> Result<ModelCapabilityResponse, AgentError> {
let cap = model_capability::Entity::find_by_id(id)
.one(db)
.await?
.ok_or_else(|| AgentError::NotFound(format!("Capability record not found: {}", id)))?;
Ok(ModelCapabilityResponse::from(cap))
}
pub async fn create_capability(
db: &AppDatabase,
request: CreateModelCapabilityRequest,
) -> Result<ModelCapabilityResponse, AgentError> {
let _ = request
.capability
.parse::<CapabilityType>()
.map_err(|_| AgentError::InvalidInput {
field: "capability".into(),
reason: "Invalid capability type".into(),
})?;
let now = Utc::now();
let active = model_capability::ActiveModel {
model_version_id: Set(request.model_version_id),
capability: Set(request.capability),
is_supported: Set(request.is_supported),
created_at: Set(now),
..Default::default()
};
let cap = active.insert(db).await?;
Ok(ModelCapabilityResponse::from(cap))
}
pub async fn update_capability(
db: &AppDatabase,
id: i64,
request: UpdateModelCapabilityRequest,
) -> Result<ModelCapabilityResponse, AgentError> {
let cap = model_capability::Entity::find_by_id(id)
.one(db)
.await?
.ok_or_else(|| AgentError::NotFound(format!("Capability record not found: {}", id)))?;
let mut active: model_capability::ActiveModel = cap.into();
if let Some(is_supported) = request.is_supported {
active.is_supported = Set(is_supported);
}
let cap = active.update(db).await?;
Ok(ModelCapabilityResponse::from(cap))
}
pub async fn delete_capability(db: &AppDatabase, id: i64) -> Result<(), AgentError> {
model_capability::Entity::delete_by_id(id).exec(db).await?;
Ok(())
}

View File

@ -1,6 +0,0 @@
pub mod capability;
pub mod model_entry;
pub mod parameter_profile;
pub mod pricing;
pub mod provider;
pub mod version;

Some files were not shown because too many files have changed in this diff Show More