Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integrate quantized data to storages #1311

Merged
merged 59 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
6959df4
integrate quantized data to storages
IvanPleshkov Jan 3, 2023
63691b4
revert gitignore
IvanPleshkov Jan 3, 2023
6abff6d
are you happy clippy
IvanPleshkov Jan 3, 2023
d70caab
quantize in optimizer
IvanPleshkov Jan 4, 2023
6749beb
provide flag
IvanPleshkov Jan 4, 2023
d851e1d
fix segfault
IvanPleshkov Jan 5, 2023
f736ce4
skip quantization flag, update scores
IvanPleshkov Jan 6, 2023
817efd1
use quantization flag
IvanPleshkov Jan 17, 2023
0e4490d
are you happy fmt
IvanPleshkov Jan 17, 2023
dfd69de
use quantization flag
IvanPleshkov Jan 17, 2023
82dc7d2
quantized search test
IvanPleshkov Jan 18, 2023
708c4cc
are you happy fmt
IvanPleshkov Jan 18, 2023
ff4d359
refactor test, refactor scorer choosing
IvanPleshkov Jan 18, 2023
d1892f7
are you happy fmt
IvanPleshkov Jan 18, 2023
59113ab
run quantization on segment builder
IvanPleshkov Jan 18, 2023
9f47e8d
decrease testing parameters
IvanPleshkov Jan 18, 2023
fa3c3a8
simplify segment
IvanPleshkov Jan 19, 2023
2926b0c
update version
IvanPleshkov Jan 19, 2023
1b1d5b1
remove use_quantization flag
IvanPleshkov Jan 19, 2023
5ca3e12
provide quantization config
IvanPleshkov Jan 19, 2023
40a8960
quantization version up
IvanPleshkov Jan 23, 2023
77263c1
euclid dist
IvanPleshkov Jan 25, 2023
ae44533
add euclid test
IvanPleshkov Jan 25, 2023
8c36fa5
saveload
IvanPleshkov Jan 26, 2023
e08fafb
fix initialization bugs
IvanPleshkov Jan 26, 2023
48ff551
quantization lib version up
IvanPleshkov Jan 26, 2023
e4be928
fix arm build
IvanPleshkov Jan 26, 2023
d628c52
refactor scorer selecting
IvanPleshkov Jan 26, 2023
6333fff
quant lib version up
IvanPleshkov Jan 26, 2023
f7e54eb
are you happy fmt
IvanPleshkov Jan 26, 2023
ba65b49
Merge branch 'dev' into integrate-quantization
IvanPleshkov Jan 30, 2023
f36c443
are you happy fmt
IvanPleshkov Jan 30, 2023
6773f03
are you happy clippy
IvanPleshkov Jan 30, 2023
6958d04
add save/load test for simple storage
IvanPleshkov Jan 30, 2023
f796067
add comments
IvanPleshkov Jan 30, 2023
303cf8f
quantiles
IvanPleshkov Feb 8, 2023
b668b96
quantization mmap
IvanPleshkov Feb 8, 2023
970ac92
remove f32
IvanPleshkov Feb 8, 2023
4cd30c3
mmap test
IvanPleshkov Feb 9, 2023
f77fba8
fix mmap slice
IvanPleshkov Feb 9, 2023
265c3e2
fix mmap test
IvanPleshkov Feb 9, 2023
6a04dff
use chunks for quantization storage
IvanPleshkov Feb 9, 2023
d6074f5
Merge branch 'dev' into integrate-quantization
IvanPleshkov Feb 9, 2023
f26c5d5
fix build
IvanPleshkov Feb 9, 2023
b2a7bfc
are you happy fmt
IvanPleshkov Feb 9, 2023
7079de7
update quantization library
IvanPleshkov Feb 9, 2023
03f8dfc
update quantization lib
IvanPleshkov Feb 9, 2023
a6baf83
update quantization lib
IvanPleshkov Feb 9, 2023
e88b238
Merge branch 'dev' into integrate-quantization
IvanPleshkov Feb 10, 2023
e23bba5
integrate api changes
IvanPleshkov Feb 16, 2023
adf633d
are you happy fmt
IvanPleshkov Feb 16, 2023
62d9352
change quantization api
IvanPleshkov Feb 17, 2023
63830a6
additional checks in tests
IvanPleshkov Feb 19, 2023
31ea0d8
update quantization version
IvanPleshkov Feb 27, 2023
39218a8
Merge branch 'dev' into integrate-quantization
IvanPleshkov Feb 27, 2023
ce4e385
fix unit tests
IvanPleshkov Feb 27, 2023
f217f34
add quantization to storage config
IvanPleshkov Feb 28, 2023
86f916f
use quantization for all cardinality search cases
IvanPleshkov Mar 1, 2023
a1c282e
Integrate quantization suggestions 2 (#1520)
generall Mar 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
are you happy clippy
  • Loading branch information
IvanPleshkov committed Jan 3, 2023
commit 6abff6d56b2c3dedfc6a89218009c77c32ce0048
2 changes: 1 addition & 1 deletion lib/segment/src/vector_storage/memmap_vector_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ where
let mmap_store = self.mmap_store.as_ref().unwrap();
if let Some(quantized_data) = &mmap_store.quantized_vectors {
if let Some(deleted_ram) = &mmap_store.deleted_ram {
let query = TMetric::preprocess(&vector).unwrap_or(vector.to_owned());
let query = TMetric::preprocess(vector).unwrap_or_else(|| vector.to_owned());
Some(Box::new(QuantizedRawScorer {
query: quantized_data.encode_query(&query),
quantized_data,
Expand Down
42 changes: 42 additions & 0 deletions lib/segment/src/vector_storage/simple_vector_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use rocksdb::DB;
use serde::{Deserialize, Serialize};

use super::chunked_vectors::ChunkedVectors;
use super::quantized_vector_storage::{EncodedVectors, QuantizedRawScorer};
use super::vector_storage_base::VectorStorage;
use crate::common::rocksdb_wrapper::DatabaseColumnWrapper;
use crate::common::Flusher;
Expand All @@ -30,6 +31,7 @@ pub struct SimpleVectorStorage<TMetric: Metric> {
vectors: ChunkedVectors,
deleted: BitVec,
deleted_count: usize,
quantized_vectors: Option<EncodedVectors>,
db_wrapper: DatabaseColumnWrapper,
}

Expand Down Expand Up @@ -129,6 +131,7 @@ pub fn open_simple_vector_storage(
vectors,
deleted,
deleted_count,
quantized_vectors: None,
db_wrapper,
}))),
Distance::Euclid => Ok(Arc::new(AtomicRefCell::new(SimpleVectorStorage::<
Expand All @@ -139,6 +142,7 @@ pub fn open_simple_vector_storage(
vectors,
deleted,
deleted_count,
quantized_vectors: None,
db_wrapper,
}))),
Distance::Dot => Ok(Arc::new(AtomicRefCell::new(SimpleVectorStorage::<
Expand All @@ -149,6 +153,7 @@ pub fn open_simple_vector_storage(
vectors,
deleted,
deleted_count,
quantized_vectors: None,
db_wrapper,
}))),
}
Expand All @@ -173,6 +178,27 @@ where

Ok(())
}

#[allow(dead_code)]
fn quantize(&mut self) -> OperationResult<()> {
if self.quantized_vectors.is_some() {
return Ok(());
}

self.quantized_vectors = Some(
EncodedVectors::encode(
(0..self.vectors.len() as u32).map(|i| self.vectors.get(i)),
Vec::new(),
match TMetric::distance() {
Distance::Cosine => quantization::encoder::SimilarityType::Dot,
Distance::Euclid => quantization::encoder::SimilarityType::L2,
Distance::Dot => quantization::encoder::SimilarityType::Dot,
},
)
.map_err(|_| OperationError::service_error("cannot quantize vector data"))?,
);
Ok(())
}
}

impl<TMetric> VectorStorage for SimpleVectorStorage<TMetric>
Expand Down Expand Up @@ -277,6 +303,22 @@ where
})
}

fn quantized_raw_scorer(
&self,
vector: &[VectorElementType],
) -> Option<Box<dyn RawScorer + '_>> {
if let Some(quantized_data) = &self.quantized_vectors {
let query = TMetric::preprocess(vector).unwrap_or_else(|| vector.to_owned());
Some(Box::new(QuantizedRawScorer {
query: quantized_data.encode_query(&query),
quantized_data,
deleted: &self.deleted,
}))
} else {
None
}
}

fn score_points(
&self,
vector: &[VectorElementType],
Expand Down