gitdataai/lib/ai/rag/payload.rs

111 lines
3.4 KiB
Rust

use std::collections::HashMap;
use qdrant_client::Payload;
use qdrant_client::qdrant::{
PointId, ScoredPoint, point_id::PointIdOptions, value::Kind,
};
use serde_json::{Map, Value, json};
use uuid::Uuid;
use super::document::{RagDocument, RagSearchHit};
use crate::error::{AiError, AiResult};
pub(super) const SESSION_ID_KEY: &str = "session_id";
pub(super) const DOCUMENT_ID_KEY: &str = "document_id";
pub(super) const CONTENT_KEY: &str = "content";
pub(super) const METADATA_KEY: &str = "metadata";
pub(super) fn point_id(session_id: &str, document_id: &str) -> u64 {
let ns = Uuid::NAMESPACE_DNS;
let key = format!("{session_id}:{document_id}");
let uuid = Uuid::new_v5(&ns, key.as_bytes());
let bytes = uuid.as_bytes();
u64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6],
bytes[7],
])
}
pub(super) fn document_payload(
session_id: &str,
document: &RagDocument,
) -> AiResult<Payload> {
Payload::try_from(json!({
SESSION_ID_KEY: session_id,
DOCUMENT_ID_KEY: document.id,
CONTENT_KEY: document.content,
METADATA_KEY: document.metadata,
}))
.map_err(|error| AiError::Config(error.to_string()))
}
pub(super) fn hit_from_scored_point(point: ScoredPoint) -> RagSearchHit {
let id = point_id_to_string(point.id);
let mut payload = qdrant_payload_to_json(point.payload);
let session_id = take_string(&mut payload, SESSION_ID_KEY);
let document_id = take_string(&mut payload, DOCUMENT_ID_KEY);
let content = take_string(&mut payload, CONTENT_KEY);
let metadata = payload
.remove(METADATA_KEY)
.and_then(|value| match value {
Value::Object(object) => Some(object.into_iter().collect()),
_ => None,
})
.unwrap_or_default();
RagSearchHit {
id: if document_id.is_empty() {
id
} else {
document_id
},
session_id,
score: point.score,
content,
metadata,
}
}
fn point_id_to_string(id: Option<PointId>) -> String {
match id.and_then(|id| id.point_id_options) {
Some(PointIdOptions::Num(id)) => id.to_string(),
Some(PointIdOptions::Uuid(id)) => id,
None => String::new(),
}
}
fn qdrant_payload_to_json(
payload: HashMap<String, qdrant_client::qdrant::Value>,
) -> Map<String, Value> {
payload
.into_iter()
.map(|(key, value)| (key, value_to_json(value)))
.collect()
}
fn value_to_json(value: qdrant_client::qdrant::Value) -> Value {
match value.kind {
Some(Kind::NullValue(_)) | None => Value::Null,
Some(Kind::DoubleValue(value)) => json!(value),
Some(Kind::IntegerValue(value)) => json!(value),
Some(Kind::StringValue(value)) => json!(value),
Some(Kind::BoolValue(value)) => json!(value),
Some(Kind::StructValue(value)) => Value::Object(
value
.fields
.into_iter()
.map(|(key, value)| (key, value_to_json(value)))
.collect(),
),
Some(Kind::ListValue(value)) => {
Value::Array(value.values.into_iter().map(value_to_json).collect())
}
}
}
fn take_string(payload: &mut Map<String, Value>, key: &str) -> String {
payload
.remove(key)
.and_then(|value| value.as_str().map(ToOwned::to_owned))
.unwrap_or_default()
}