feat(tag): vectorize repo tags after hook sync with incremental embedding + FC tool
- HookWorker gains optional embed_service field - Captures changed tag names during webhook dispatch, batch-embeds after completion - HookService auto-inits EmbedService from config for standalone git-hook binary - Adds agent dep to git crate (no circular dep) - SSH/HTTP servers no longer call start_worker (dedicated git-hook handles it) - git_tag_search FC tool for agent semantic tag search with project isolation
This commit is contained in:
parent
62727a93a1
commit
2a9ec6d509
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3108,6 +3108,7 @@ name = "git"
|
|||||||
version = "0.2.9"
|
version = "0.2.9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
|
"agent",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
|
|||||||
@ -112,7 +112,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
cfg,
|
cfg,
|
||||||
);
|
);
|
||||||
|
|
||||||
let cancel = hooks.start_worker();
|
let cancel = hooks.start_worker().await;
|
||||||
let cancel_signal = cancel.clone();
|
let cancel_signal = cancel.clone();
|
||||||
|
|
||||||
// 7. Start health/metrics server on a dedicated port
|
// 7. Start health/metrics server on a dedicated port
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
use super::ctx::GitToolCtx;
|
use super::ctx::GitToolCtx;
|
||||||
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
use agent::{ToolDefinition, ToolHandler, ToolParam, ToolRegistry, ToolSchema};
|
||||||
|
use sea_orm::{ColumnTrait, EntityTrait, QueryFilter};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
async fn git_tag_list_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
async fn git_tag_list_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||||
@ -63,6 +64,56 @@ async fn git_tag_info_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<s
|
|||||||
Ok(tag_to_json(&info))
|
Ok(tag_to_json(&info))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn git_tag_search_exec(ctx: GitToolCtx, args: serde_json::Value) -> Result<serde_json::Value, String> {
|
||||||
|
let p: serde_json::Map<String, serde_json::Value> = serde_json::from_value(args).map_err(|e| e.to_string())?;
|
||||||
|
let project_name = p.get("project_name").and_then(|v| v.as_str()).ok_or("missing project_name")?;
|
||||||
|
let query = p.get("query").and_then(|v| v.as_str()).ok_or("missing query")?;
|
||||||
|
let limit = p.get("limit").and_then(|v| v.as_u64()).unwrap_or(5) as usize;
|
||||||
|
|
||||||
|
// Resolve project_id from project_name for project isolation
|
||||||
|
let db = ctx.ctx.db();
|
||||||
|
let project = models::projects::project::Entity::find()
|
||||||
|
.filter(models::projects::project::Column::Name.eq(project_name))
|
||||||
|
.one(db)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("DB error looking up project '{}': {}", project_name, e))?
|
||||||
|
.ok_or_else(|| format!("project '{}' not found", project_name))?;
|
||||||
|
let project_id = project.id.to_string();
|
||||||
|
|
||||||
|
// Get embed_service from context
|
||||||
|
let embed = ctx.ctx.embed_service()
|
||||||
|
.ok_or_else(|| "EmbedService not available — Qdrant vector search is disabled".to_string())?;
|
||||||
|
|
||||||
|
let results = embed.search_tags(query, &project_id, limit).await
|
||||||
|
.map_err(|e| format!("tag search failed: {}", e))?;
|
||||||
|
|
||||||
|
let json_results: Vec<serde_json::Value> = results.into_iter().map(|r| {
|
||||||
|
let repo_name = r.payload.extra.as_ref()
|
||||||
|
.and_then(|e| e.get("repo_name"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let tag_name = r.payload.extra.as_ref()
|
||||||
|
.and_then(|e| e.get("tag_name"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let description = r.payload.extra.as_ref()
|
||||||
|
.and_then(|e| e.get("description"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
serde_json::json!({
|
||||||
|
"tag_name": tag_name,
|
||||||
|
"repo_name": repo_name,
|
||||||
|
"description": description,
|
||||||
|
"score": r.score,
|
||||||
|
})
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
Ok(serde_json::to_value(json_results).map_err(|e| e.to_string())?)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn register_git_tools(registry: &mut ToolRegistry) {
|
pub fn register_git_tools(registry: &mut ToolRegistry) {
|
||||||
// git_tag_list
|
// git_tag_list
|
||||||
let p = HashMap::from([
|
let p = HashMap::from([
|
||||||
@ -97,4 +148,21 @@ pub fn register_git_tools(registry: &mut ToolRegistry) {
|
|||||||
})
|
})
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// git_tag_search
|
||||||
|
let p = HashMap::from([
|
||||||
|
("project_name".into(), ToolParam { name: "project_name".into(), param_type: "string".into(), description: Some("Project name (slug)".into()), required: true, properties: None, items: None }),
|
||||||
|
("query".into(), ToolParam { name: "query".into(), param_type: "string".into(), description: Some("Semantic search query to find relevant tags".into()), required: true, properties: None, items: None }),
|
||||||
|
("limit".into(), ToolParam { name: "limit".into(), param_type: "integer".into(), description: Some("Maximum number of results (default 5)".into()), required: false, properties: None, items: None }),
|
||||||
|
]);
|
||||||
|
let schema = ToolSchema { schema_type: "object".into(), properties: Some(p), required: Some(vec!["project_name".into(), "query".into()]) };
|
||||||
|
registry.register(
|
||||||
|
ToolDefinition::new("git_tag_search").description("Semantically search tags across repositories in a project. Uses vector search to find tags relevant to the query, with project-level isolation.").parameters(schema),
|
||||||
|
ToolHandler::new(|ctx, args| {
|
||||||
|
let gctx = super::ctx::GitToolCtx::new(ctx);
|
||||||
|
Box::pin(async move {
|
||||||
|
git_tag_search_exec(gctx, args).await.map_err(agent::ToolError::ExecutionError)
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
@ -51,5 +51,6 @@ actix-web = { workspace = true }
|
|||||||
hex = "0.4.3"
|
hex = "0.4.3"
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
metrics = { workspace = true }
|
metrics = { workspace = true }
|
||||||
|
agent = { workspace = true }
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|||||||
@ -11,6 +11,34 @@ pub mod webhook_dispatch;
|
|||||||
pub use pool::{HookWorker, PoolConfig, RedisConsumer};
|
pub use pool::{HookWorker, PoolConfig, RedisConsumer};
|
||||||
pub use pool::types::{HookTask, TaskType};
|
pub use pool::types::{HookTask, TaskType};
|
||||||
|
|
||||||
|
/// Helper to initialize an optional EmbedService from config (graceful degradation).
|
||||||
|
async fn init_embed_service(config: &AppConfig, db: &AppDatabase) -> Option<agent::embed::EmbedService> {
|
||||||
|
match agent::new_embed_client(config).await {
|
||||||
|
Ok(client) => {
|
||||||
|
let model_name = config
|
||||||
|
.get_embed_model_name()
|
||||||
|
.unwrap_or_else(|_| "text-embedding-3-small".into());
|
||||||
|
let dimensions = config
|
||||||
|
.get_embed_model_dimensions()
|
||||||
|
.unwrap_or(1536);
|
||||||
|
let svc = agent::embed::EmbedService::new(
|
||||||
|
client,
|
||||||
|
db.writer().clone(),
|
||||||
|
model_name,
|
||||||
|
dimensions,
|
||||||
|
);
|
||||||
|
// Ensure the repo_tag collection exists
|
||||||
|
let _ = svc.ensure_collections().await;
|
||||||
|
tracing::info!("hook worker: EmbedService initialized for tag embedding");
|
||||||
|
Some(svc)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(error = %e, "hook worker: EmbedService not available — tag embedding disabled");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Hook service that manages the Redis-backed task queue worker.
|
/// Hook service that manages the Redis-backed task queue worker.
|
||||||
/// Multiple gitserver pods can run concurrently — the worker acquires a
|
/// Multiple gitserver pods can run concurrently — the worker acquires a
|
||||||
/// per-repo Redis lock before processing each task.
|
/// per-repo Redis lock before processing each task.
|
||||||
@ -20,6 +48,7 @@ pub struct HookService {
|
|||||||
pub(crate) cache: AppCache,
|
pub(crate) cache: AppCache,
|
||||||
pub(crate) redis_pool: RedisPool,
|
pub(crate) redis_pool: RedisPool,
|
||||||
pub(crate) config: AppConfig,
|
pub(crate) config: AppConfig,
|
||||||
|
pub(crate) embed_service: Option<agent::embed::EmbedService>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HookService {
|
impl HookService {
|
||||||
@ -34,17 +63,31 @@ impl HookService {
|
|||||||
cache,
|
cache,
|
||||||
redis_pool,
|
redis_pool,
|
||||||
config,
|
config,
|
||||||
|
embed_service: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set an externally-initialized EmbedService (e.g. from the web app).
|
||||||
|
pub fn with_embed_service(mut self, svc: agent::embed::EmbedService) -> Self {
|
||||||
|
self.embed_service = Some(svc);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Start the background worker and return a cancellation token.
|
/// Start the background worker and return a cancellation token.
|
||||||
pub fn start_worker(&self) -> CancellationToken {
|
pub async fn start_worker(&self) -> CancellationToken {
|
||||||
|
// Auto-init embed_service if not set and config allows (standalone binaries)
|
||||||
|
let embed = match self.embed_service.clone() {
|
||||||
|
Some(svc) => Some(svc),
|
||||||
|
None => init_embed_service(&self.config, &self.db).await,
|
||||||
|
};
|
||||||
|
|
||||||
let pool_config = PoolConfig::from_env(&self.config);
|
let pool_config = PoolConfig::from_env(&self.config);
|
||||||
pool::start_worker(
|
pool::start_worker(
|
||||||
self.db.clone(),
|
self.db.clone(),
|
||||||
self.cache.clone(),
|
self.cache.clone(),
|
||||||
self.redis_pool.clone(),
|
self.redis_pool.clone(),
|
||||||
pool_config,
|
pool_config,
|
||||||
|
embed,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,6 +19,7 @@ pub fn start_worker(
|
|||||||
cache: AppCache,
|
cache: AppCache,
|
||||||
redis_pool: RedisPool,
|
redis_pool: RedisPool,
|
||||||
config: PoolConfig,
|
config: PoolConfig,
|
||||||
|
embed_service: Option<agent::embed::EmbedService>,
|
||||||
) -> CancellationToken {
|
) -> CancellationToken {
|
||||||
let consumer = RedisConsumer::new(
|
let consumer = RedisConsumer::new(
|
||||||
redis_pool.clone(),
|
redis_pool.clone(),
|
||||||
@ -28,7 +29,7 @@ pub fn start_worker(
|
|||||||
|
|
||||||
let http_client = Arc::new(reqwest::Client::new());
|
let http_client = Arc::new(reqwest::Client::new());
|
||||||
let max_retries = config.redis_max_retries as u32;
|
let max_retries = config.redis_max_retries as u32;
|
||||||
let worker = HookWorker::new(db, cache, consumer, http_client, max_retries);
|
let worker = HookWorker::new(db, cache, consumer, http_client, max_retries, embed_service);
|
||||||
|
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
let cancel_clone = cancel.clone();
|
let cancel_clone = cancel.clone();
|
||||||
|
|||||||
@ -2,10 +2,13 @@ use crate::error::GitError;
|
|||||||
use crate::hook::pool::redis::RedisConsumer;
|
use crate::hook::pool::redis::RedisConsumer;
|
||||||
use crate::hook::pool::types::{HookTask, TaskType};
|
use crate::hook::pool::types::{HookTask, TaskType};
|
||||||
use crate::hook::sync::HookMetaDataSync;
|
use crate::hook::sync::HookMetaDataSync;
|
||||||
|
use agent::TagEmbedInput;
|
||||||
use db::cache::AppCache;
|
use db::cache::AppCache;
|
||||||
use db::database::AppDatabase;
|
use db::database::AppDatabase;
|
||||||
use metrics::counter;
|
use metrics::counter;
|
||||||
|
use models::repos::repo_tag;
|
||||||
use models::EntityTrait;
|
use models::EntityTrait;
|
||||||
|
use sea_orm::{ColumnTrait, QueryFilter};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
@ -20,6 +23,7 @@ pub struct HookWorker {
|
|||||||
consumer: RedisConsumer,
|
consumer: RedisConsumer,
|
||||||
http_client: Arc<reqwest::Client>,
|
http_client: Arc<reqwest::Client>,
|
||||||
max_retries: u32,
|
max_retries: u32,
|
||||||
|
embed_service: Option<agent::embed::EmbedService>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HookWorker {
|
impl HookWorker {
|
||||||
@ -29,6 +33,7 @@ impl HookWorker {
|
|||||||
consumer: RedisConsumer,
|
consumer: RedisConsumer,
|
||||||
http_client: Arc<reqwest::Client>,
|
http_client: Arc<reqwest::Client>,
|
||||||
max_retries: u32,
|
max_retries: u32,
|
||||||
|
embed_service: Option<agent::embed::EmbedService>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
db,
|
db,
|
||||||
@ -36,6 +41,7 @@ impl HookWorker {
|
|||||||
consumer,
|
consumer,
|
||||||
http_client,
|
http_client,
|
||||||
max_retries,
|
max_retries,
|
||||||
|
embed_service,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -277,6 +283,7 @@ impl HookWorker {
|
|||||||
|
|
||||||
// Dispatch tag webhooks and collect handles
|
// Dispatch tag webhooks and collect handles
|
||||||
let mut tag_changes: u64 = 0;
|
let mut tag_changes: u64 = 0;
|
||||||
|
let mut changed_tag_names: Vec<String> = Vec::new();
|
||||||
for (tag, after_oid) in after_tag_tips {
|
for (tag, after_oid) in after_tag_tips {
|
||||||
let before_oid = before_tag_tips
|
let before_oid = before_tag_tips
|
||||||
.iter()
|
.iter()
|
||||||
@ -286,6 +293,7 @@ impl HookWorker {
|
|||||||
let was_updated = before_oid.map(|o| o != after_oid.as_str()).unwrap_or(false);
|
let was_updated = before_oid.map(|o| o != after_oid.as_str()).unwrap_or(false);
|
||||||
if is_new || was_updated {
|
if is_new || was_updated {
|
||||||
tag_changes += 1;
|
tag_changes += 1;
|
||||||
|
changed_tag_names.push(tag.clone());
|
||||||
let before_oid = before_oid.map_or("0", |v| v).to_string();
|
let before_oid = before_oid.map_or("0", |v| v).to_string();
|
||||||
let tag_name = tag.clone();
|
let tag_name = tag.clone();
|
||||||
let h = tokio::spawn({
|
let h = tokio::spawn({
|
||||||
@ -326,6 +334,51 @@ impl HookWorker {
|
|||||||
counter!("hook_sync_branches_changed_total").increment(branch_changes);
|
counter!("hook_sync_branches_changed_total").increment(branch_changes);
|
||||||
counter!("hook_sync_tags_changed_total").increment(tag_changes);
|
counter!("hook_sync_tags_changed_total").increment(tag_changes);
|
||||||
|
|
||||||
|
// Embed changed tags into Qdrant for semantic search (non-blocking, fire-and-forget)
|
||||||
|
if let Some(ref embed) = self.embed_service {
|
||||||
|
if !changed_tag_names.is_empty() {
|
||||||
|
let es = embed.clone();
|
||||||
|
let db = self.db.clone();
|
||||||
|
let repo_uuid = repo_uuid;
|
||||||
|
let repo_name = repo_name.clone();
|
||||||
|
let project_id = project.to_string();
|
||||||
|
let tag_names = changed_tag_names.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let tags = repo_tag::Entity::find()
|
||||||
|
.filter(repo_tag::Column::Repo.eq(repo_uuid))
|
||||||
|
.filter(repo_tag::Column::Name.is_in(tag_names))
|
||||||
|
.all(db.reader())
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match tags {
|
||||||
|
Ok(rows) => {
|
||||||
|
let count = rows.len();
|
||||||
|
let inputs: Vec<TagEmbedInput> = rows
|
||||||
|
.into_iter()
|
||||||
|
.map(|t| TagEmbedInput {
|
||||||
|
repo_id: repo_uuid.to_string(),
|
||||||
|
repo_name: repo_name.clone(),
|
||||||
|
project_id: project_id.clone(),
|
||||||
|
name: t.name,
|
||||||
|
description: t.description,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
if !inputs.is_empty() {
|
||||||
|
if let Err(e) = es.embed_tags_batch(inputs).await {
|
||||||
|
tracing::warn!(error = %e, "failed to embed changed tags");
|
||||||
|
} else {
|
||||||
|
tracing::debug!(count, "embedded changed tags into Qdrant");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(error = %e, "failed to query changed tags for embedding");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -118,14 +118,12 @@ pub async fn run_http(config: AppConfig) -> anyhow::Result<()> {
|
|||||||
let app_cache = app_cache?;
|
let app_cache = app_cache?;
|
||||||
|
|
||||||
let redis_pool = app_cache.redis_pool().clone();
|
let redis_pool = app_cache.redis_pool().clone();
|
||||||
let hook = HookService::new(
|
let _hook = HookService::new(
|
||||||
db.clone(),
|
db.clone(),
|
||||||
app_cache.clone(),
|
app_cache.clone(),
|
||||||
redis_pool.clone(),
|
redis_pool.clone(),
|
||||||
config.clone(),
|
config.clone(),
|
||||||
);
|
);
|
||||||
let _worker_cancel = hook.start_worker();
|
|
||||||
tracing::info!("hook worker started");
|
|
||||||
|
|
||||||
let sync = crate::ssh::ReceiveSyncService::new(redis_pool.clone());
|
let sync = crate::ssh::ReceiveSyncService::new(redis_pool.clone());
|
||||||
|
|
||||||
|
|||||||
@ -279,15 +279,12 @@ pub async fn run_ssh(config: AppConfig) -> anyhow::Result<()> {
|
|||||||
let cache = AppCache::init(&config).await?;
|
let cache = AppCache::init(&config).await?;
|
||||||
let redis_pool = cache.redis_pool().clone();
|
let redis_pool = cache.redis_pool().clone();
|
||||||
|
|
||||||
// Start the hook worker (Redis queue consumer)
|
let _hook = crate::hook::HookService::new(
|
||||||
let hook = crate::hook::HookService::new(
|
|
||||||
db.clone(),
|
db.clone(),
|
||||||
cache.clone(),
|
cache.clone(),
|
||||||
redis_pool.clone(),
|
redis_pool.clone(),
|
||||||
config.clone(),
|
config.clone(),
|
||||||
);
|
);
|
||||||
let _worker_cancel = hook.start_worker();
|
|
||||||
tracing::info!("hook worker started");
|
|
||||||
|
|
||||||
SSHHandle::new(db, config.clone(), cache, redis_pool)
|
SSHHandle::new(db, config.clone(), cache, redis_pool)
|
||||||
.run_ssh()
|
.run_ssh()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user