Skip to content

Commit

Permalink
mmap byte storages support
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanPleshkov committed Apr 17, 2024
1 parent c8a0f8b commit c8a8468
Showing 7 changed files with 169 additions and 38 deletions.
4 changes: 3 additions & 1 deletion lib/segment/src/segment.rs
Original file line number Diff line number Diff line change
@@ -215,7 +215,9 @@ impl Segment {
VectorStorageEnum::DenseSimple(_)
| VectorStorageEnum::DenseSimpleByte(_)
| VectorStorageEnum::DenseMemmap(_)
| VectorStorageEnum::DenseAppendableMemmap(_) => {
| VectorStorageEnum::DenseMemmapByte(_)
| VectorStorageEnum::DenseAppendableMemmap(_)
| VectorStorageEnum::DenseAppendableMemmapByte(_) => {
Vector::from(vec![1.0; dim])
}
VectorStorageEnum::SparseSimple(_) => Vector::from(SparseVector::default()),
47 changes: 33 additions & 14 deletions lib/segment/src/segment_constructor/segment_constructor_base.rs
Original file line number Diff line number Diff line change
@@ -32,8 +32,12 @@ use crate::types::{
Distance, Indexes, PayloadStorageType, SegmentConfig, SegmentState, SegmentType, SeqNumberType,
VectorStorageDatatype, VectorStorageType,
};
use crate::vector_storage::dense::appendable_mmap_dense_vector_storage::open_appendable_memmap_vector_storage;
use crate::vector_storage::dense::memmap_dense_vector_storage::open_memmap_vector_storage;
use crate::vector_storage::dense::appendable_mmap_dense_vector_storage::{
open_appendable_memmap_vector_storage, open_appendable_memmap_vector_storage_byte,
};
use crate::vector_storage::dense::memmap_dense_vector_storage::{
open_memmap_vector_storage, open_memmap_vector_storage_byte,
};
use crate::vector_storage::dense::simple_dense_vector_storage::{
open_simple_dense_byte_vector_storage, open_simple_dense_vector_storage,
};
@@ -122,6 +126,7 @@ fn create_segment(
let vector_index_path = get_vector_index_path(segment_path, vector_name);

// Select suitable vector storage type based on configuration
let storage_element_type = vector_config.datatype.unwrap_or_default();
let vector_storage = match vector_config.storage_type {
// In memory
VectorStorageType::Memory => {
@@ -136,7 +141,6 @@ fn create_segment(
stopped,
)?
} else {
let storage_element_type = vector_config.datatype.unwrap_or_default();
match storage_element_type {
VectorStorageDatatype::Float => open_simple_dense_vector_storage(
database.clone(),
@@ -156,18 +160,33 @@ fn create_segment(
}
}
// Mmap on disk, not appendable
VectorStorageType::Mmap => open_memmap_vector_storage(
&vector_storage_path,
vector_config.size,
vector_config.distance,
)?,
VectorStorageType::Mmap => match storage_element_type {
VectorStorageDatatype::Float => open_memmap_vector_storage(
&vector_storage_path,
vector_config.size,
vector_config.distance,
)?,
VectorStorageDatatype::Uint8 => open_memmap_vector_storage_byte(
&vector_storage_path,
vector_config.size,
vector_config.distance,
)?,
},
// Chunked mmap on disk, appendable
VectorStorageType::ChunkedMmap => open_appendable_memmap_vector_storage(
&vector_storage_path,
vector_config.size,
vector_config.distance,
stopped,
)?,
VectorStorageType::ChunkedMmap => match storage_element_type {
VectorStorageDatatype::Float => open_appendable_memmap_vector_storage(
&vector_storage_path,
vector_config.size,
vector_config.distance,
stopped,
)?,
VectorStorageDatatype::Uint8 => open_appendable_memmap_vector_storage_byte(
&vector_storage_path,
vector_config.size,
vector_config.distance,
stopped,
)?,
},
};

// Warn when number of points between ID tracker and storage differs
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@ use crate::common::operation_error::{check_process_stopped, OperationResult};
use crate::common::Flusher;
use crate::data_types::named_vectors::CowVector;
use crate::data_types::primitive::PrimitiveVectorElement;
use crate::data_types::vectors::{VectorElementType, VectorRef};
use crate::data_types::vectors::{VectorElementType, VectorElementTypeByte, VectorRef};
use crate::types::Distance;
use crate::vector_storage::chunked_mmap_vectors::ChunkedMmapVectors;
use crate::vector_storage::dense::dynamic_mmap_flags::DynamicMmapFlags;
@@ -34,13 +34,42 @@ pub fn open_appendable_memmap_vector_storage(
distance: Distance,
stopped: &AtomicBool,
) -> OperationResult<Arc<AtomicRefCell<VectorStorageEnum>>> {
let storage = open_appendable_memmap_vector_storage_impl::<VectorElementType>(
path, dim, distance, stopped,
)?;

Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseAppendableMemmap(Box::new(storage)),
)))
}

pub fn open_appendable_memmap_vector_storage_byte(
path: &Path,
dim: usize,
distance: Distance,
stopped: &AtomicBool,
) -> OperationResult<Arc<AtomicRefCell<VectorStorageEnum>>> {
let storage = open_appendable_memmap_vector_storage_impl::<VectorElementTypeByte>(
path, dim, distance, stopped,
)?;

Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseAppendableMemmapByte(Box::new(storage)),
)))
}

pub fn open_appendable_memmap_vector_storage_impl<T: PrimitiveVectorElement>(
path: &Path,
dim: usize,
distance: Distance,
stopped: &AtomicBool,
) -> OperationResult<AppendableMmapDenseVectorStorage<T>> {
create_dir_all(path)?;

let vectors_path = path.join(VECTORS_DIR_PATH);
let deleted_path = path.join(DELETED_DIR_PATH);

let vectors: ChunkedMmapVectors<VectorElementType> =
ChunkedMmapVectors::open(&vectors_path, dim)?;
let vectors = ChunkedMmapVectors::<T>::open(&vectors_path, dim)?;

let num_vectors = vectors.len();

@@ -55,16 +84,12 @@ pub fn open_appendable_memmap_vector_storage(
check_process_stopped(stopped)?;
}

let storage = AppendableMmapDenseVectorStorage {
Ok(AppendableMmapDenseVectorStorage {
vectors,
deleted,
distance,
deleted_count,
};

Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseAppendableMemmap(Box::new(storage)),
)))
})
}

impl<T: PrimitiveVectorElement + 'static> AppendableMmapDenseVectorStorage<T> {
@@ -88,8 +113,8 @@ impl<T: PrimitiveVectorElement + 'static> AppendableMmapDenseVectorStorage<T> {
}
}

impl DenseVectorStorage<VectorElementType> for AppendableMmapDenseVectorStorage<VectorElementType> {
fn get_dense(&self, key: PointOffsetType) -> &[VectorElementType] {
impl<T: PrimitiveVectorElement> DenseVectorStorage<T> for AppendableMmapDenseVectorStorage<T> {
fn get_dense(&self, key: PointOffsetType) -> &[T] {
self.vectors.get(key)
}
}
63 changes: 51 additions & 12 deletions lib/segment/src/vector_storage/dense/memmap_dense_vector_storage.rs
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ use crate::common::operation_error::{check_process_stopped, OperationResult};
use crate::common::Flusher;
use crate::data_types::named_vectors::CowVector;
use crate::data_types::primitive::PrimitiveVectorElement;
use crate::data_types::vectors::{VectorElementType, VectorRef};
use crate::data_types::vectors::{VectorElementType, VectorElementTypeByte, VectorRef};
use crate::types::Distance;
use crate::vector_storage::common::get_async_scorer;
use crate::vector_storage::dense::mmap_dense_vectors::MmapDenseVectors;
@@ -41,7 +41,31 @@ pub fn open_memmap_vector_storage(
dim: usize,
distance: Distance,
) -> OperationResult<Arc<AtomicRefCell<VectorStorageEnum>>> {
open_memmap_vector_storage_with_async_io(path, dim, distance, get_async_scorer())
let storage = open_memmap_vector_storage_with_async_io_impl::<VectorElementType>(
path,
dim,
distance,
get_async_scorer(),
)?;
Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseMemmap(storage),
)))
}

pub fn open_memmap_vector_storage_byte(
path: &Path,
dim: usize,
distance: Distance,
) -> OperationResult<Arc<AtomicRefCell<VectorStorageEnum>>> {
let storage = open_memmap_vector_storage_with_async_io_impl::<VectorElementTypeByte>(
path,
dim,
distance,
get_async_scorer(),
)?;
Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseMemmapByte(storage),
)))
}

pub fn open_memmap_vector_storage_with_async_io(
@@ -50,20 +74,35 @@ pub fn open_memmap_vector_storage_with_async_io(
distance: Distance,
with_async_io: bool,
) -> OperationResult<Arc<AtomicRefCell<VectorStorageEnum>>> {
let storage = open_memmap_vector_storage_with_async_io_impl::<VectorElementTypeByte>(
path,
dim,
distance,
with_async_io,
)?;
Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseMemmapByte(storage),
)))
}

fn open_memmap_vector_storage_with_async_io_impl<T: PrimitiveVectorElement>(
path: &Path,
dim: usize,
distance: Distance,
with_async_io: bool,
) -> OperationResult<Box<MemmapDenseVectorStorage<T>>> {
create_dir_all(path)?;

let vectors_path = path.join(VECTORS_PATH);
let deleted_path = path.join(DELETED_PATH);
let mmap_store = MmapDenseVectors::open(&vectors_path, &deleted_path, dim, with_async_io)?;

Ok(Arc::new(AtomicRefCell::new(
VectorStorageEnum::DenseMemmap(Box::new(MemmapDenseVectorStorage {
vectors_path,
deleted_path,
mmap_store: Some(mmap_store),
distance,
})),
)))
Ok(Box::new(MemmapDenseVectorStorage {
vectors_path,
deleted_path,
mmap_store: Some(mmap_store),
distance,
}))
}

impl<T: PrimitiveVectorElement> MemmapDenseVectorStorage<T> {
@@ -87,8 +126,8 @@ impl<T: PrimitiveVectorElement> MemmapDenseVectorStorage<T> {
}
}

impl DenseVectorStorage<VectorElementType> for MemmapDenseVectorStorage<VectorElementType> {
fn get_dense(&self, key: PointOffsetType) -> &[VectorElementType] {
impl<T: PrimitiveVectorElement> DenseVectorStorage<T> for MemmapDenseVectorStorage<T> {
fn get_dense(&self, key: PointOffsetType) -> &[T] {
self.mmap_store.as_ref().unwrap().get_vector(key)
}
}
6 changes: 6 additions & 0 deletions lib/segment/src/vector_storage/quantized/quantized_vectors.rs
Original file line number Diff line number Diff line change
@@ -117,9 +117,15 @@ impl QuantizedVectors {
VectorStorageEnum::DenseMemmap(v) => {
Self::create_impl(v.as_ref(), quantization_config, path, max_threads, stopped)
}
VectorStorageEnum::DenseMemmapByte(v) => {
Self::create_impl(v.as_ref(), quantization_config, path, max_threads, stopped)
}
VectorStorageEnum::DenseAppendableMemmap(v) => {
Self::create_impl(v.as_ref(), quantization_config, path, max_threads, stopped)
}
VectorStorageEnum::DenseAppendableMemmapByte(v) => {
Self::create_impl(v.as_ref(), quantization_config, path, max_threads, stopped)
}
VectorStorageEnum::SparseSimple(_) => Err(OperationError::WrongSparse),
VectorStorageEnum::MultiDenseSimple(_v) => Err(OperationError::WrongMulti),
}
8 changes: 8 additions & 0 deletions lib/segment/src/vector_storage/raw_scorer.rs
Original file line number Diff line number Diff line change
@@ -136,9 +136,17 @@ pub fn new_stoppable_raw_scorer<'a>(
raw_scorer_impl(query, vs.as_ref(), point_deleted, is_stopped)
}

// TODO(byte_storage): Implement async raw scorer for DenseAppendableMemmapByte
VectorStorageEnum::DenseMemmapByte(vs) => {
raw_scorer_byte_impl(query, vs.as_ref(), point_deleted, is_stopped)
}

VectorStorageEnum::DenseAppendableMemmap(vs) => {
raw_scorer_impl(query, vs.as_ref(), point_deleted, is_stopped)
}
VectorStorageEnum::DenseAppendableMemmapByte(vs) => {
raw_scorer_byte_impl(query, vs.as_ref(), point_deleted, is_stopped)
}
VectorStorageEnum::SparseSimple(vs) => {
raw_sparse_scorer_impl(query, vs, point_deleted, is_stopped)
}
Loading
Oops, something went wrong.

0 comments on commit c8a8468

Please sign in to comment.