Skip to content

Commit

Permalink
[Strict mode] limits for filter and conditions (#5754)
Browse files Browse the repository at this point in the history
* Add limits for filter and conditions

* clippy

* Review remarks + nested condition test

* Fix opnapi specs

* Improve error message by giving info about limits and usage
  • Loading branch information
JojiiOfficial authored and timvisee committed Jan 16, 2025
1 parent 67b4466 commit b870f96
Show file tree
Hide file tree
Showing 9 changed files with 342 additions and 16 deletions.
2 changes: 2 additions & 0 deletions docs/grpc/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -1460,6 +1460,8 @@ Note: 1kB = 1 vector of size 256. |
| read_rate_limit | [uint32](#uint32) | optional | Max number of read operations per minute per replica |
| write_rate_limit | [uint32](#uint32) | optional | Max number of write operations per minute per replica |
| max_collection_payload_size_bytes | [uint64](#uint64) | optional | |
| filter_max_conditions | [uint64](#uint64) | optional | |
| condition_max_size | [uint64](#uint64) | optional | |



Expand Down
14 changes: 14 additions & 0 deletions docs/redoc/master/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -7329,6 +7329,20 @@
"format": "uint",
"minimum": 0,
"nullable": true
},
"filter_max_conditions": {
"description": "Max conditions a filter can have.",
"type": "integer",
"format": "uint",
"minimum": 0,
"nullable": true
},
"condition_max_size": {
"description": "Max size of a condition, eg. items in `MatchAny`.",
"type": "integer",
"format": "uint",
"minimum": 0,
"nullable": true
}
}
},
Expand Down
4 changes: 4 additions & 0 deletions lib/api/src/grpc/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1717,6 +1717,8 @@ impl From<StrictModeConfig> for segment::types::StrictModeConfig {
max_collection_payload_size_bytes: value
.max_collection_payload_size_bytes
.map(|i| i as usize),
filter_max_conditions: value.filter_max_conditions.map(|i| i as usize),
condition_max_size: value.condition_max_size.map(|i| i as usize),
}
}
}
Expand All @@ -1741,6 +1743,8 @@ impl From<segment::types::StrictModeConfig> for StrictModeConfig {
max_collection_payload_size_bytes: value
.max_collection_payload_size_bytes
.map(|i| i as u64),
filter_max_conditions: value.filter_max_conditions.map(|i| i as u64),
condition_max_size: value.condition_max_size.map(|i| i as u64),
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions lib/api/src/grpc/proto/collections.proto
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ message MaxOptimizationThreads {
enum Setting {
Auto = 0;
}

oneof variant {
uint64 value = 1;
Setting setting = 2;
Expand Down Expand Up @@ -271,10 +271,10 @@ message OptimizersConfigDiff {
Interval between forced flushes.
*/
optional uint64 flush_interval_sec = 7;

// Deprecated in favor of `max_optimization_threads`
optional uint64 deprecated_max_optimization_threads = 8;

/*
Max number of threads (jobs) for running optimizations per shard.
Note: each optimization job will also use `max_indexing_threads` threads by itself for index building.
Expand Down Expand Up @@ -340,6 +340,8 @@ message StrictModeConfig {
optional uint32 read_rate_limit = 11; // Max number of read operations per minute per replica
optional uint32 write_rate_limit = 12; // Max number of write operations per minute per replica
optional uint64 max_collection_payload_size_bytes = 13;
optional uint64 filter_max_conditions = 14;
optional uint64 condition_max_size = 15;
}

message CreateCollection {
Expand Down
4 changes: 4 additions & 0 deletions lib/api/src/grpc/qdrant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,10 @@ pub struct StrictModeConfig {
pub write_rate_limit: ::core::option::Option<u32>,
#[prost(uint64, optional, tag = "13")]
pub max_collection_payload_size_bytes: ::core::option::Option<u64>,
#[prost(uint64, optional, tag = "14")]
pub filter_max_conditions: ::core::option::Option<u64>,
#[prost(uint64, optional, tag = "15")]
pub condition_max_size: ::core::option::Option<u64>,
}
#[derive(validator::Validate)]
#[derive(serde::Serialize)]
Expand Down
77 changes: 66 additions & 11 deletions lib/collection/src/operations/verification/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,23 +108,28 @@ pub trait StrictModeVerification {
let check_filter = |filter: Option<&Filter>,
allow_unindexed_filter: Option<bool>|
-> Result<(), CollectionError> {
if let Some(read_filter) = filter {
if allow_unindexed_filter == Some(false) {
if let Some((key, schemas)) = collection.one_unindexed_key(read_filter) {
let possible_schemas_str = schemas
.iter()
.map(|schema| schema.to_string())
.collect::<Vec<_>>()
.join(", ");

return Err(CollectionError::strict_mode(
let Some(filter) = filter else {
return Ok(());
};

// Check for filter indices
if allow_unindexed_filter == Some(false) {
if let Some((key, schemas)) = collection.one_unindexed_key(filter) {
let possible_schemas_str = schemas
.iter()
.map(|schema| schema.to_string())
.collect::<Vec<_>>()
.join(", ");

return Err(CollectionError::strict_mode(
format!("Index required but not found for \"{key}\" of one of the following types: [{possible_schemas_str}]"),
"Create an index for this key or use a different filter.",
));
}
}
}

check_filter_limits(filter, strict_mode_config)?;

Ok(())
};

Expand Down Expand Up @@ -158,6 +163,39 @@ pub trait StrictModeVerification {
}
}

fn check_filter_limits(
filter: &Filter,
strict_mode_config: &StrictModeConfig,
) -> Result<(), CollectionError> {
// Filter condition count limit
if let Some(filter_condition_limit) = strict_mode_config.filter_max_conditions {
let filter_conditions = filter.total_conditions_count();

if !check_custom(|| Some(filter_conditions), Some(filter_condition_limit)) {
return Err(CollectionError::strict_mode(
format!("Filter condition limit reached ({filter_conditions} > {filter_condition_limit})"),
"Reduce the amount of conditions of your filter.",
));
}
}

// Filter condition size limit
if let Some(max_condition_size) = strict_mode_config.condition_max_size {
let input_condition_size = filter.max_condition_input_size();

if !check_custom(|| Some(input_condition_size), Some(max_condition_size)) {
return Err(CollectionError::strict_mode(
format!(
"Condition size limit reached ({input_condition_size} > {max_condition_size})"
),
"Reduce the size of your condition.",
));
}
}

Ok(())
}

pub fn check_timeout(
timeout: usize,
strict_mode_config: &StrictModeConfig,
Expand Down Expand Up @@ -189,6 +227,7 @@ pub(crate) fn check_limit_opt<T: PartialOrd + Display>(
let (Some(limit), Some(value)) = (limit, value) else {
return Ok(());
};

if value > limit {
return Err(CollectionError::strict_mode(
format!("Limit exceeded {value} > {limit} for \"{name}\""),
Expand All @@ -199,6 +238,21 @@ pub(crate) fn check_limit_opt<T: PartialOrd + Display>(
Ok(())
}

pub(crate) fn check_custom<T: PartialOrd>(
value_fn: impl FnOnce() -> Option<T>,
limit: Option<T>,
) -> bool {
let Some(limit) = limit else {
return true;
};

let Some(value) = value_fn() else {
return true;
};

value <= limit
}

impl StrictModeVerification for SearchParams {
async fn check_custom(
&self,
Expand Down Expand Up @@ -412,6 +466,7 @@ mod test {
read_rate_limit: None,
write_rate_limit: None,
max_collection_payload_size_bytes: None,
..Default::default()
};

fixture_collection(&strict_mode_config).await
Expand Down
95 changes: 94 additions & 1 deletion lib/segment/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,14 @@ pub struct StrictModeConfig {
/// Max size of a collections payload storage in bytes
#[serde(skip_serializing_if = "Option::is_none")]
pub max_collection_payload_size_bytes: Option<usize>,

/// Max conditions a filter can have.
#[serde(skip_serializing_if = "Option::is_none")]
pub filter_max_conditions: Option<usize>,

/// Max size of a condition, eg. items in `MatchAny`.
#[serde(skip_serializing_if = "Option::is_none")]
pub condition_max_size: Option<usize>,
}

impl Eq for StrictModeConfig {}
Expand All @@ -746,6 +754,8 @@ impl Hash for StrictModeConfig {
read_rate_limit,
write_rate_limit,
max_collection_payload_size_bytes,
filter_max_conditions,
condition_max_size,
} = self;
(
enabled,
Expand All @@ -759,7 +769,11 @@ impl Hash for StrictModeConfig {
max_collection_vector_size_bytes,
read_rate_limit,
write_rate_limit,
max_collection_payload_size_bytes,
(
max_collection_payload_size_bytes,
filter_max_conditions,
condition_max_size,
),
)
.hash(state);
}
Expand Down Expand Up @@ -1513,6 +1527,22 @@ pub enum AnyVariants {
Integers(IndexSet<IntPayloadType, FnvBuildHasher>),
}

impl AnyVariants {
pub fn len(&self) -> usize {
match self {
AnyVariants::Strings(index_set) => index_set.len(),
AnyVariants::Integers(index_set) => index_set.len(),
}
}

pub fn is_empty(&self) -> bool {
match self {
AnyVariants::Strings(index_set) => index_set.is_empty(),
AnyVariants::Integers(index_set) => index_set.is_empty(),
}
}
}

/// Exact match of the given value
#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
Expand Down Expand Up @@ -2060,6 +2090,19 @@ impl FieldCondition {
}
)
}

fn input_size(&self) -> usize {
if self.r#match.is_none() {
return 0;
}

match self.r#match.as_ref().unwrap() {
Match::Any(match_any) => match_any.any.len(),
Match::Except(match_except) => match_except.except.len(),
Match::Value(_) => 0,
Match::Text(_) => 0,
}
}
}

pub fn validate_field_condition(field_condition: &FieldCondition) -> Result<(), ValidationError> {
Expand Down Expand Up @@ -2222,6 +2265,34 @@ impl Condition {
nested: Nested { key, filter },
})
}

pub fn size_estimation(&self) -> usize {
match self {
Condition::Field(field_condition) => field_condition.input_size(),
Condition::HasId(has_id_condition) => has_id_condition.has_id.len(),
Condition::Filter(filter) => filter.max_condition_input_size(),
Condition::Nested(nested) => nested.filter().max_condition_input_size(),
Condition::IsEmpty(_)
| Condition::IsNull(_)
| Condition::HasVector(_)
| Condition::CustomIdChecker(_) => 0,
}
}

pub fn sub_conditions_count(&self) -> usize {
match self {
Condition::Nested(nested_condition) => {
nested_condition.filter().total_conditions_count()
}
Condition::Filter(filter) => filter.total_conditions_count(),
Condition::Field(_)
| Condition::IsEmpty(_)
| Condition::IsNull(_)
| Condition::CustomIdChecker(_)
| Condition::HasId(_)
| Condition::HasVector(_) => 0,
}
}
}

// The validator crate does not support deriving for enums.
Expand Down Expand Up @@ -2556,6 +2627,28 @@ impl Filter {
.chain(self.should.iter().flatten())
.chain(self.min_should.iter().flat_map(|i| &i.conditions))
}

/// Returns the total amount of conditions of the filter, including all nested filter.
pub fn total_conditions_count(&self) -> usize {
fn count_all_conditions(field: Option<&Vec<Condition>>) -> usize {
field
.map(|i| i.len() + i.iter().map(|j| j.sub_conditions_count()).sum::<usize>())
.unwrap_or(0)
}

count_all_conditions(self.should.as_ref())
+ count_all_conditions(self.min_should.as_ref().map(|i| &i.conditions))
+ count_all_conditions(self.must.as_ref())
+ count_all_conditions(self.must_not.as_ref())
}

/// Returns the size of the largest condition.
pub fn max_condition_input_size(&self) -> usize {
self.iter_conditions()
.map(|i| i.size_estimation())
.max()
.unwrap_or(0)
}
}

#[derive(Debug, Clone, Copy, Eq, PartialEq)]
Expand Down
2 changes: 2 additions & 0 deletions lib/storage/src/content_manager/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ pub fn strict_mode_from_api(value: api::grpc::qdrant::StrictModeConfig) -> Stric
max_collection_payload_size_bytes: value
.max_collection_payload_size_bytes
.map(|i| i as usize),
filter_max_conditions: value.filter_max_conditions.map(|i| i as usize),
condition_max_size: value.condition_max_size.map(|i| i as usize),
}
}

Expand Down
Loading

0 comments on commit b870f96

Please sign in to comment.