gitdataai/libs/agent/embed/mod.rs
ZhenYi bfdb934443 feat(embed): add chunked embedding, batch memory embed, and tag vectorization support
- chunk_text(): char-boundary-safe text chunking at paragraph/sentence breaks (7000 char limit)
- embed_memories_batch(): groups messages by room, batch-embeds all texts to reduce Qdrant calls
- embed_issue_chunked(): auto-chunks long issue bodies
- embed_skill(): upgraded with auto-chunking via chunk_text
- TagEmbedInput struct for batch tag embedding
- embed_tags_batch() / search_tags() with project isolation
- ensure_collections() now creates embed_repo_tag collection
2026-04-28 13:03:51 +08:00

30 lines
1.1 KiB
Rust

pub mod client;
pub mod qdrant;
pub mod service;
pub use client::{EmbedClient, EmbedPayload, EmbedVector, SearchResult};
pub use qdrant::QdrantClient;
pub use service::{EmbedMemoryInput, EmbedService, Embeddable, TagEmbedInput};
pub async fn new_embed_client(config: &config::AppConfig) -> crate::Result<EmbedClient> {
let base_url = config
.get_embed_model_base_url()
.map_err(|e| crate::AgentError::Internal(e.to_string()))?;
let api_key = config
.get_embed_model_api_key()
.map_err(|e| crate::AgentError::Internal(e.to_string()))?;
let qdrant_url = config
.get_qdrant_url()
.map_err(|e| crate::AgentError::Internal(e.to_string()))?;
let qdrant_api_key = config.get_qdrant_api_key();
let openai = rig::providers::openai::Client::builder()
.api_key(&api_key)
.base_url(&base_url)
.build()
.map_err(|e| crate::AgentError::Internal(format!("failed to build rig openai client: {}", e)))?;
let qdrant = QdrantClient::new(&qdrant_url, qdrant_api_key.as_deref()).await?;
Ok(EmbedClient::new(openai, qdrant))
}