diff --git a/lib/segment/src/data_types/named_vectors.rs b/lib/segment/src/data_types/named_vectors.rs
index 9d6eb8149c8..4a1f264ad07 100644
--- a/lib/segment/src/data_types/named_vectors.rs
+++ b/lib/segment/src/data_types/named_vectors.rs
@@ -89,6 +89,15 @@ impl<'a> From for CowVector<'a> {
}
}
+impl<'a> From> for CowVector<'a> {
+ fn from(v: Cow<'a, MultiDenseVector>) -> Self {
+ match v {
+ Cow::Borrowed(v) => CowVector::MultiDense(Cow::Borrowed(v)),
+ Cow::Owned(v) => CowVector::MultiDense(Cow::Owned(v)),
+ }
+ }
+}
+
impl<'a> From<&'a SparseVector> for CowVector<'a> {
fn from(v: &'a SparseVector) -> Self {
CowVector::Sparse(Cow::Borrowed(v))
diff --git a/lib/segment/src/data_types/primitive.rs b/lib/segment/src/data_types/primitive.rs
index 811d2b9eee6..e811d9ad814 100644
--- a/lib/segment/src/data_types/primitive.rs
+++ b/lib/segment/src/data_types/primitive.rs
@@ -3,6 +3,7 @@ use std::borrow::Cow;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
+use super::vectors::TypedMultiDenseVector;
use crate::data_types::vectors::{VectorElementType, VectorElementTypeByte};
use crate::spaces::metric::Metric;
use crate::spaces::simple::{CosineMetric, DotProductMetric, EuclidMetric, ManhattanMetric};
@@ -22,6 +23,14 @@ pub trait PrimitiveVectorElement:
) -> Cow<'a, [f32]>;
fn datatype() -> VectorStorageDatatype;
+
+ fn from_float_multivector(
+ multivector: Cow>,
+ ) -> Cow>;
+
+ fn into_float_multivector(
+ multivector: Cow>,
+ ) -> Cow>;
}
impl PrimitiveVectorElement for VectorElementType {
@@ -44,6 +53,18 @@ impl PrimitiveVectorElement for VectorElementType {
fn datatype() -> VectorStorageDatatype {
VectorStorageDatatype::Float32
}
+
+ fn from_float_multivector(
+ multivector: Cow>,
+ ) -> Cow> {
+ multivector
+ }
+
+ fn into_float_multivector(
+ multivector: Cow>,
+ ) -> Cow> {
+ multivector
+ }
}
impl PrimitiveVectorElement for VectorElementTypeByte {
@@ -86,4 +107,30 @@ impl PrimitiveVectorElement for VectorElementTypeByte {
fn datatype() -> VectorStorageDatatype {
VectorStorageDatatype::Uint8
}
+
+ fn from_float_multivector(
+ multivector: Cow>,
+ ) -> Cow> {
+ Cow::Owned(TypedMultiDenseVector::new(
+ multivector
+ .inner_vector
+ .iter()
+ .map(|&x| x as Self)
+ .collect_vec(),
+ multivector.dim,
+ ))
+ }
+
+ fn into_float_multivector(
+ multivector: Cow>,
+ ) -> Cow> {
+ Cow::Owned(TypedMultiDenseVector::new(
+ multivector
+ .inner_vector
+ .iter()
+ .map(|&x| x as VectorElementType)
+ .collect_vec(),
+ multivector.dim,
+ ))
+ }
}
diff --git a/lib/segment/src/data_types/vectors.rs b/lib/segment/src/data_types/vectors.rs
index 6334c2b6903..12f9d107197 100644
--- a/lib/segment/src/data_types/vectors.rs
+++ b/lib/segment/src/data_types/vectors.rs
@@ -1,12 +1,14 @@
use std::collections::HashMap;
use std::slice::ChunksExactMut;
+use itertools::Itertools;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use sparse::common::sparse_vector::SparseVector;
use validator::Validate;
use super::named_vectors::NamedVectors;
+use super::primitive::PrimitiveVectorElement;
use crate::common::operation_error::OperationError;
use crate::common::utils::transpose_map_into_named_vector;
use crate::vector_storage::query::context_query::ContextQuery;
@@ -176,13 +178,15 @@ pub type DenseVector = TypedDenseVector;
/// Type for multi dense vector
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
-pub struct MultiDenseVector {
- pub inner_vector: DenseVector, // vectors are flattened into a single vector
- pub dim: usize, // dimension of each vector
+pub struct TypedMultiDenseVector {
+ pub inner_vector: TypedDenseVector, // vectors are flattened into a single vector
+ pub dim: usize, // dimension of each vector
}
-impl MultiDenseVector {
- pub fn new(flattened_vectors: DenseVector, dim: usize) -> Self {
+pub type MultiDenseVector = TypedMultiDenseVector;
+
+impl TypedMultiDenseVector {
+ pub fn new(flattened_vectors: TypedDenseVector, dim: usize) -> Self {
Self {
inner_vector: flattened_vectors,
dim,
@@ -192,17 +196,17 @@ impl MultiDenseVector {
/// MultiDenseVector cannot be empty, so we use a placeholder vector instead
pub fn placeholder(dim: usize) -> Self {
Self {
- inner_vector: vec![1.0; dim],
+ inner_vector: vec![Default::default(); dim],
dim,
}
}
/// Slices the multi vector into the underlying individual vectors
- pub fn multi_vectors(&self) -> impl Iterator- {
+ pub fn multi_vectors(&self) -> impl Iterator
- {
self.inner_vector.chunks_exact(self.dim)
}
- pub fn multi_vectors_mut(&mut self) -> ChunksExactMut<'_, VectorElementType> {
+ pub fn multi_vectors_mut(&mut self) -> ChunksExactMut<'_, T> {
self.inner_vector.chunks_exact_mut(self.dim)
}
@@ -211,10 +215,10 @@ impl MultiDenseVector {
}
}
-impl TryFrom> for MultiDenseVector {
+impl TryFrom>> for TypedMultiDenseVector {
type Error = OperationError;
- fn try_from(value: Vec) -> Result {
+ fn try_from(value: Vec>) -> Result {
if value.is_empty() {
return Err(OperationError::ValidationError {
description: "MultiDenseVector cannot be empty".to_string(),
@@ -228,8 +232,8 @@ impl TryFrom> for MultiDenseVector {
received_dim: bad_vec.len(),
})
} else {
- let inner_vector = value.into_iter().flatten().collect();
- let multi_dense = MultiDenseVector { inner_vector, dim };
+ let inner_vector = value.into_iter().flatten().collect_vec();
+ let multi_dense = TypedMultiDenseVector { inner_vector, dim };
Ok(multi_dense)
}
}
diff --git a/lib/segment/src/vector_storage/query_scorer/multi_custom_query_scorer.rs b/lib/segment/src/vector_storage/query_scorer/multi_custom_query_scorer.rs
index fa6811a3fe7..7951e14ba8d 100644
--- a/lib/segment/src/vector_storage/query_scorer/multi_custom_query_scorer.rs
+++ b/lib/segment/src/vector_storage/query_scorer/multi_custom_query_scorer.rs
@@ -12,7 +12,7 @@ use crate::vector_storage::MultiVectorStorage;
pub struct MultiCustomQueryScorer<
'a,
TMetric: Metric,
- TVectorStorage: MultiVectorStorage,
+ TVectorStorage: MultiVectorStorage,
TQuery: Query,
> {
vector_storage: &'a TVectorStorage,
@@ -23,7 +23,7 @@ pub struct MultiCustomQueryScorer<
impl<
'a,
TMetric: Metric,
- TVectorStorage: MultiVectorStorage,
+ TVectorStorage: MultiVectorStorage,
TQuery: Query + TransformInto,
> MultiCustomQueryScorer<'a, TMetric, TVectorStorage, TQuery>
{
@@ -50,7 +50,7 @@ impl<
impl<
'a,
TMetric: Metric,
- TVectorStorage: MultiVectorStorage,
+ TVectorStorage: MultiVectorStorage,
TQuery: Query,
> QueryScorer
for MultiCustomQueryScorer<'a, TMetric, TVectorStorage, TQuery>
diff --git a/lib/segment/src/vector_storage/query_scorer/multi_metric_query_scorer.rs b/lib/segment/src/vector_storage/query_scorer/multi_metric_query_scorer.rs
index 409466941bb..76e1163ad24 100644
--- a/lib/segment/src/vector_storage/query_scorer/multi_metric_query_scorer.rs
+++ b/lib/segment/src/vector_storage/query_scorer/multi_metric_query_scorer.rs
@@ -11,15 +11,18 @@ use crate::vector_storage::MultiVectorStorage;
pub struct MultiMetricQueryScorer<
'a,
TMetric: Metric,
- TVectorStorage: MultiVectorStorage,
+ TVectorStorage: MultiVectorStorage,
> {
vector_storage: &'a TVectorStorage,
query: MultiDenseVector,
metric: PhantomData,
}
-impl<'a, TMetric: Metric, TVectorStorage: MultiVectorStorage>
- MultiMetricQueryScorer<'a, TMetric, TVectorStorage>
+impl<
+ 'a,
+ TMetric: Metric,
+ TVectorStorage: MultiVectorStorage,
+ > MultiMetricQueryScorer<'a, TMetric, TVectorStorage>
{
pub fn new(query: MultiDenseVector, vector_storage: &'a TVectorStorage) -> Self {
let slices = query.multi_vectors();
@@ -47,8 +50,11 @@ impl<'a, TMetric: Metric, TVectorStorage: MultiVectorStorage>
}
}
-impl<'a, TMetric: Metric, TVectorStorage: MultiVectorStorage>
- QueryScorer for MultiMetricQueryScorer<'a, TMetric, TVectorStorage>
+impl<
+ 'a,
+ TMetric: Metric,
+ TVectorStorage: MultiVectorStorage,
+ > QueryScorer for MultiMetricQueryScorer<'a, TMetric, TVectorStorage>
{
#[inline]
fn score_stored(&self, idx: PointOffsetType) -> ScoreType {
diff --git a/lib/segment/src/vector_storage/raw_scorer.rs b/lib/segment/src/vector_storage/raw_scorer.rs
index 72b203db28d..3cf3ebf5014 100644
--- a/lib/segment/src/vector_storage/raw_scorer.rs
+++ b/lib/segment/src/vector_storage/raw_scorer.rs
@@ -414,7 +414,7 @@ where
}))
}
-pub fn raw_multi_scorer_impl<'a, TVectorStorage: MultiVectorStorage>(
+pub fn raw_multi_scorer_impl<'a, TVectorStorage: MultiVectorStorage>(
query: QueryVector,
vector_storage: &'a TVectorStorage,
point_deleted: &'a BitSlice,
@@ -451,7 +451,7 @@ pub fn raw_multi_scorer_impl<'a, TVectorStorage: MultiVectorStorage>(
fn new_multi_scorer_with_metric<
'a,
TMetric: Metric + 'a,
- TVectorStorage: MultiVectorStorage,
+ TVectorStorage: MultiVectorStorage,
>(
query: QueryVector,
vector_storage: &'a TVectorStorage,
diff --git a/lib/segment/src/vector_storage/simple_multi_dense_vector_storage.rs b/lib/segment/src/vector_storage/simple_multi_dense_vector_storage.rs
index c2364308676..0e2835e45d9 100644
--- a/lib/segment/src/vector_storage/simple_multi_dense_vector_storage.rs
+++ b/lib/segment/src/vector_storage/simple_multi_dense_vector_storage.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
use std::ops::Range;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
@@ -12,23 +13,26 @@ use crate::common::operation_error::{check_process_stopped, OperationError, Oper
use crate::common::rocksdb_wrapper::DatabaseColumnWrapper;
use crate::common::Flusher;
use crate::data_types::named_vectors::CowVector;
-use crate::data_types::vectors::{MultiDenseVector, VectorRef};
+use crate::data_types::primitive::PrimitiveVectorElement;
+use crate::data_types::vectors::{
+ MultiDenseVector, TypedMultiDenseVector, VectorElementType, VectorRef,
+};
use crate::types::{Distance, MultiVectorConfig, VectorStorageDatatype};
use crate::vector_storage::bitvec::bitvec_set_deleted;
use crate::vector_storage::common::StoredRecord;
use crate::vector_storage::{MultiVectorStorage, VectorStorage, VectorStorageEnum};
-type StoredMultiDenseVector = StoredRecord;
+type StoredMultiDenseVector = StoredRecord>;
/// In-memory vector storage with on-update persistence using `store`
-pub struct SimpleMultiDenseVectorStorage {
+pub struct SimpleMultiDenseVectorStorage {
dim: usize,
distance: Distance,
multi_vector_config: MultiVectorConfig,
/// Keep vectors in memory
- vectors: Vec,
+ vectors: Vec>,
db_wrapper: DatabaseColumnWrapper,
- update_buffer: StoredMultiDenseVector,
+ update_buffer: StoredMultiDenseVector,
/// BitVec for deleted flags. Grows dynamically upto last set flag.
deleted: BitVec,
/// Current number of deleted vectors.
@@ -44,14 +48,14 @@ pub fn open_simple_multi_dense_vector_storage(
multi_vector_config: MultiVectorConfig,
stopped: &AtomicBool,
) -> OperationResult>> {
- let mut vectors: Vec = vec![];
+ let mut vectors: Vec> = vec![];
let (mut deleted, mut deleted_count) = (BitVec::new(), 0);
let db_wrapper = DatabaseColumnWrapper::new(database, database_column_name);
db_wrapper.lock_db().iter()?;
for (key, value) in db_wrapper.lock_db().iter()? {
let point_id: PointOffsetType = bincode::deserialize(&key)
.map_err(|_| OperationError::service_error("cannot deserialize point id from db"))?;
- let stored_record: StoredMultiDenseVector = bincode::deserialize(&value)
+ let stored_record: StoredMultiDenseVector = bincode::deserialize(&value)
.map_err(|_| OperationError::service_error("cannot deserialize record from db"))?;
// Propagate deleted flag
@@ -61,7 +65,7 @@ pub fn open_simple_multi_dense_vector_storage(
}
let point_id_usize = point_id as usize;
if point_id_usize >= vectors.len() {
- vectors.resize(point_id_usize + 1, MultiDenseVector::placeholder(dim));
+ vectors.resize(point_id_usize + 1, TypedMultiDenseVector::placeholder(dim));
}
vectors[point_id_usize] = stored_record.vector;
@@ -77,7 +81,7 @@ pub fn open_simple_multi_dense_vector_storage(
db_wrapper,
update_buffer: StoredMultiDenseVector {
deleted: false,
- vector: MultiDenseVector::placeholder(dim),
+ vector: TypedMultiDenseVector::placeholder(dim),
},
deleted,
deleted_count,
@@ -85,7 +89,7 @@ pub fn open_simple_multi_dense_vector_storage(
)))
}
-impl SimpleMultiDenseVectorStorage {
+impl SimpleMultiDenseVectorStorage {
/// Set deleted flag for given key. Returns previous deleted state.
#[inline]
fn set_deleted(&mut self, key: PointOffsetType, deleted: bool) -> bool {
@@ -107,7 +111,7 @@ impl SimpleMultiDenseVectorStorage {
&mut self,
key: PointOffsetType,
deleted: bool,
- vector: Option,
+ vector: Option>,
) -> OperationResult<()> {
// Write vector state to buffer record
let record = &mut self.update_buffer;
@@ -126,8 +130,8 @@ impl SimpleMultiDenseVectorStorage {
}
}
-impl MultiVectorStorage for SimpleMultiDenseVectorStorage {
- fn get_multi(&self, key: PointOffsetType) -> &MultiDenseVector {
+impl MultiVectorStorage for SimpleMultiDenseVectorStorage {
+ fn get_multi(&self, key: PointOffsetType) -> &TypedMultiDenseVector {
self.vectors.get(key as usize).expect("vector not found")
}
@@ -136,7 +140,7 @@ impl MultiVectorStorage for SimpleMultiDenseVectorStorage {
}
}
-impl VectorStorage for SimpleMultiDenseVectorStorage {
+impl VectorStorage for SimpleMultiDenseVectorStorage {
fn vector_dim(&self) -> usize {
self.dim
}
@@ -159,16 +163,17 @@ impl VectorStorage for SimpleMultiDenseVectorStorage {
fn get_vector(&self, key: PointOffsetType) -> CowVector {
let multi_dense_vector = self.vectors.get(key as usize).expect("vector not found");
+ let multi_dense_vector = T::into_float_multivector(Cow::Borrowed(multi_dense_vector));
CowVector::from(multi_dense_vector)
}
fn insert_vector(&mut self, key: PointOffsetType, vector: VectorRef) -> OperationResult<()> {
let vector: &MultiDenseVector = vector.try_into()?;
- let multi_vector = vector.clone();
+ let multi_vector = T::from_float_multivector(Cow::Borrowed(vector)).into_owned();
let key_usize = key as usize;
if key_usize >= self.vectors.len() {
self.vectors
- .resize(key_usize + 1, MultiDenseVector::placeholder(self.dim));
+ .resize(key_usize + 1, TypedMultiDenseVector::placeholder(self.dim));
}
self.vectors[key_usize] = multi_vector.clone();
self.set_deleted(key, false);
@@ -187,8 +192,10 @@ impl VectorStorage for SimpleMultiDenseVectorStorage {
check_process_stopped(stopped)?;
// Do not perform preprocessing - vectors should be already processed
let other_vector = other.get_vector(point_id);
- let other_vector: &MultiDenseVector = other_vector.as_vec_ref().try_into()?;
- let other_multi_vector = other_vector.clone();
+ let other_vector: &TypedMultiDenseVector =
+ other_vector.as_vec_ref().try_into()?;
+ let other_multi_vector =
+ T::from_float_multivector(Cow::Borrowed(other_vector)).into_owned();
let other_deleted = other.is_deleted_vector(point_id);
self.vectors.push(other_multi_vector.clone());
let new_id = self.vectors.len() as PointOffsetType - 1;
diff --git a/lib/segment/src/vector_storage/vector_storage_base.rs b/lib/segment/src/vector_storage/vector_storage_base.rs
index 64625953277..bc3b32053c4 100644
--- a/lib/segment/src/vector_storage/vector_storage_base.rs
+++ b/lib/segment/src/vector_storage/vector_storage_base.rs
@@ -13,7 +13,7 @@ use crate::common::Flusher;
use crate::data_types::named_vectors::CowVector;
use crate::data_types::primitive::PrimitiveVectorElement;
use crate::data_types::vectors::{
- MultiDenseVector, VectorElementType, VectorElementTypeByte, VectorRef,
+ TypedMultiDenseVector, VectorElementType, VectorElementTypeByte, VectorRef,
};
use crate::types::{Distance, MultiVectorConfig, VectorStorageDatatype};
use crate::vector_storage::dense::appendable_mmap_dense_vector_storage::AppendableMmapDenseVectorStorage;
@@ -110,8 +110,8 @@ pub trait SparseVectorStorage: VectorStorage {
fn get_sparse(&self, key: PointOffsetType) -> OperationResult;
}
-pub trait MultiVectorStorage: VectorStorage {
- fn get_multi(&self, key: PointOffsetType) -> &MultiDenseVector;
+pub trait MultiVectorStorage: VectorStorage {
+ fn get_multi(&self, key: PointOffsetType) -> &TypedMultiDenseVector;
fn multi_vector_config(&self) -> &MultiVectorConfig;
}
@@ -123,7 +123,7 @@ pub enum VectorStorageEnum {
DenseAppendableMemmap(Box>),
DenseAppendableMemmapByte(Box>),
SparseSimple(SimpleSparseVectorStorage),
- MultiDenseSimple(SimpleMultiDenseVectorStorage),
+ MultiDenseSimple(SimpleMultiDenseVectorStorage),
}
impl VectorStorage for VectorStorageEnum {