233 lines
6.2 KiB
Rust
233 lines
6.2 KiB
Rust
use async_trait::async_trait;
|
|
use qdrant_client::qdrant::Filter;
|
|
use sea_orm::DatabaseConnection;
|
|
use std::sync::Arc;
|
|
|
|
use super::client::{EmbedClient, EmbedPayload, EmbedVector, SearchResult};
|
|
|
|
#[async_trait]
|
|
pub trait Embeddable {
|
|
fn entity_type(&self) -> &'static str;
|
|
fn to_text(&self) -> String;
|
|
fn entity_id(&self) -> String;
|
|
}
|
|
|
|
pub struct EmbedService {
|
|
client: Arc<EmbedClient>,
|
|
db: DatabaseConnection,
|
|
model_name: String,
|
|
dimensions: u64,
|
|
}
|
|
|
|
impl EmbedService {
|
|
pub fn new(
|
|
client: EmbedClient,
|
|
db: DatabaseConnection,
|
|
model_name: String,
|
|
dimensions: u64,
|
|
) -> Self {
|
|
Self {
|
|
client: Arc::new(client),
|
|
db,
|
|
model_name,
|
|
dimensions,
|
|
}
|
|
}
|
|
|
|
pub async fn embed_issue(
|
|
&self,
|
|
id: &str,
|
|
title: &str,
|
|
body: Option<&str>,
|
|
) -> crate::Result<()> {
|
|
let text = match body {
|
|
Some(b) if !b.is_empty() => format!("{}\n\n{}", title, b),
|
|
_ => title.to_string(),
|
|
};
|
|
|
|
let vector = self.client.embed_text(&text, &self.model_name).await?;
|
|
|
|
let point = EmbedVector {
|
|
id: id.to_string(),
|
|
vector,
|
|
payload: EmbedPayload {
|
|
entity_type: "issue".to_string(),
|
|
entity_id: id.to_string(),
|
|
text,
|
|
extra: None,
|
|
},
|
|
};
|
|
|
|
self.client.upsert(vec![point]).await
|
|
}
|
|
|
|
pub async fn embed_repo(
|
|
&self,
|
|
id: &str,
|
|
name: &str,
|
|
description: Option<&str>,
|
|
) -> crate::Result<()> {
|
|
let text = match description {
|
|
Some(d) if !d.is_empty() => format!("{}: {}", name, d),
|
|
_ => name.to_string(),
|
|
};
|
|
|
|
let vector = self.client.embed_text(&text, &self.model_name).await?;
|
|
|
|
let point = EmbedVector {
|
|
id: id.to_string(),
|
|
vector,
|
|
payload: EmbedPayload {
|
|
entity_type: "repo".to_string(),
|
|
entity_id: id.to_string(),
|
|
text,
|
|
extra: None,
|
|
},
|
|
};
|
|
|
|
self.client.upsert(vec![point]).await
|
|
}
|
|
|
|
pub async fn embed_issues<T: Embeddable + Send + Sync>(
|
|
&self,
|
|
items: Vec<T>,
|
|
) -> crate::Result<()> {
|
|
if items.is_empty() {
|
|
return Ok(());
|
|
}
|
|
|
|
let texts: Vec<String> = items.iter().map(|i| i.to_text()).collect();
|
|
let embeddings = self.client.embed_batch(&texts, &self.model_name).await?;
|
|
|
|
let points: Vec<EmbedVector> = items
|
|
.into_iter()
|
|
.zip(embeddings.into_iter())
|
|
.map(|(item, vector)| EmbedVector {
|
|
id: item.entity_id(),
|
|
vector,
|
|
payload: EmbedPayload {
|
|
entity_type: item.entity_type().to_string(),
|
|
entity_id: item.entity_id(),
|
|
text: item.to_text(),
|
|
extra: None,
|
|
},
|
|
})
|
|
.collect();
|
|
|
|
self.client.upsert(points).await
|
|
}
|
|
|
|
pub async fn search_issues(
|
|
&self,
|
|
query: &str,
|
|
limit: usize,
|
|
) -> crate::Result<Vec<SearchResult>> {
|
|
self.client
|
|
.search(query, "issue", &self.model_name, limit)
|
|
.await
|
|
}
|
|
|
|
pub async fn search_repos(
|
|
&self,
|
|
query: &str,
|
|
limit: usize,
|
|
) -> crate::Result<Vec<SearchResult>> {
|
|
self.client
|
|
.search(query, "repo", &self.model_name, limit)
|
|
.await
|
|
}
|
|
|
|
pub async fn search_issues_filtered(
|
|
&self,
|
|
query: &str,
|
|
limit: usize,
|
|
filter: Filter,
|
|
) -> crate::Result<Vec<SearchResult>> {
|
|
self.client
|
|
.search_with_filter(query, "issue", &self.model_name, limit, filter)
|
|
.await
|
|
}
|
|
|
|
pub async fn delete_issue_embedding(&self, issue_id: &str) -> crate::Result<()> {
|
|
self.client.delete_by_entity_id("issue", issue_id).await
|
|
}
|
|
|
|
pub async fn delete_repo_embedding(&self, repo_id: &str) -> crate::Result<()> {
|
|
self.client.delete_by_entity_id("repo", repo_id).await
|
|
}
|
|
|
|
pub async fn ensure_collections(&self) -> crate::Result<()> {
|
|
self.client
|
|
.ensure_collection("issue", self.dimensions)
|
|
.await?;
|
|
self.client
|
|
.ensure_collection("repo", self.dimensions)
|
|
.await?;
|
|
self.client.ensure_skill_collection(self.dimensions).await?;
|
|
self.client.ensure_memory_collection(self.dimensions).await?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn db(&self) -> &DatabaseConnection {
|
|
&self.db
|
|
}
|
|
|
|
pub fn client(&self) -> &Arc<EmbedClient> {
|
|
&self.client
|
|
}
|
|
|
|
/// Embed a project skill into Qdrant for vector-based semantic search.
|
|
pub async fn embed_skill(
|
|
&self,
|
|
skill_id: i64,
|
|
name: &str,
|
|
description: Option<&str>,
|
|
content: &str,
|
|
project_uuid: &str,
|
|
) -> crate::Result<()> {
|
|
let desc = description.unwrap_or_default();
|
|
let id = skill_id.to_string();
|
|
self.client
|
|
.embed_skill(&id, name, desc, content, project_uuid)
|
|
.await
|
|
}
|
|
|
|
/// Search skills by semantic similarity within a project.
|
|
pub async fn search_skills(
|
|
&self,
|
|
query: &str,
|
|
project_uuid: &str,
|
|
limit: usize,
|
|
) -> crate::Result<Vec<SearchResult>> {
|
|
self.client
|
|
.search_skills(query, &self.model_name, project_uuid, limit)
|
|
.await
|
|
}
|
|
|
|
/// Embed a conversation message into Qdrant as a memory vector.
|
|
pub async fn embed_memory(
|
|
&self,
|
|
message_id: i64,
|
|
text: &str,
|
|
room_id: &str,
|
|
user_id: Option<&str>,
|
|
) -> crate::Result<()> {
|
|
let id = message_id.to_string();
|
|
self.client
|
|
.embed_memory(&id, text, room_id, user_id)
|
|
.await
|
|
}
|
|
|
|
/// Search past conversation messages by semantic similarity within a room.
|
|
pub async fn search_memories(
|
|
&self,
|
|
query: &str,
|
|
room_id: &str,
|
|
limit: usize,
|
|
) -> crate::Result<Vec<SearchResult>> {
|
|
self.client
|
|
.search_memories(query, &self.model_name, room_id, limit)
|
|
.await
|
|
}
|
|
}
|