Skip to content

Commit

Permalink
Rewrite nested filters again (qdrant#1935)
Browse files Browse the repository at this point in the history
* working nested filters

* rm unused file

* add comment example

* todo

* remove nester checkers

* Box recursive generic Fn types

* Box recursive generic Fn types [2/2]

* Add optional ID tracker to check_payload, remove boxed closure (qdrant#1939)

* Add optional ID tracker to check_payload, remove boxed closure

* Replace some match with or_else

* Some nested filter improvements (qdrant#1940)

* Replace starts_with/substring with strip_prefix

* Transform for-if-let-check-return into any iterator

* Transform for-if-return into any iterator

* Add comment to describe why check_payload has no ID tracker

See: qdrant#1935 (comment)

* Update lib/segment/src/payload_storage/query_checker.rs

Co-authored-by: Luis Cossío <luis.cossio@qdrant.com>

* Update lib/segment/src/payload_storage/query_checker.rs

Co-authored-by: Luis Cossío <luis.cossio@qdrant.com>

* fix clippy

---------

Co-authored-by: timvisee <tim@visee.me>
Co-authored-by: Tim Visée <tim+github@visee.me>
Co-authored-by: Luis Cossío <luis.cossio@qdrant.com>
  • Loading branch information
4 people committed May 22, 2023
1 parent 30a1ee0 commit 2e5e029
Show file tree
Hide file tree
Showing 19 changed files with 436 additions and 1,005 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ mod tests {
use itertools::Itertools;
use parking_lot::RwLock;
use segment::entry::entry_point::SegmentEntry;
use segment::types::{Distance, PayloadSchemaType};
use segment::types::{Distance, PayloadContainer, PayloadSchemaType};
use serde_json::{json, Value};
use tempfile::Builder;

Expand Down
4 changes: 3 additions & 1 deletion lib/collection/src/grouping/aggregator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use itertools::Itertools;
use ordered_float::OrderedFloat;
use segment::data_types::groups::GroupId;
use segment::spaces::tools::{peek_top_largest_iterable, peek_top_smallest_iterable};
use segment::types::{ExtendedPointId, Order, PointIdType, ScoreType, ScoredPoint};
use segment::types::{
ExtendedPointId, Order, PayloadContainer, PointIdType, ScoreType, ScoredPoint,
};
use serde_json::Value;

use super::types::AggregatorError::{self, *};
Expand Down
2 changes: 1 addition & 1 deletion lib/collection/src/operations/payload_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ impl SplitByShard for SetPayload {

#[cfg(test)]
mod tests {
use segment::types::Payload;
use segment::types::{Payload, PayloadContainer};
use serde_json::Value;

use super::*;
Expand Down
2 changes: 1 addition & 1 deletion lib/collection/tests/collection_restore_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use collection::operations::point_ops::{
use collection::operations::types::ScrollRequest;
use collection::operations::CollectionUpdateOperations;
use itertools::Itertools;
use segment::types::{PayloadSelectorExclude, WithPayloadInterface};
use segment::types::{PayloadContainer, PayloadSelectorExclude, WithPayloadInterface};
use serde_json::Value;
use tempfile::Builder;

Expand Down
59 changes: 55 additions & 4 deletions lib/segment/src/index/query_optimization/condition_converter.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
use std::collections::HashSet;

use serde_json::Value;

use crate::common::utils::IndexesMap;
use crate::id_tracker::IdTrackerSS;
use crate::index::field_index::FieldIndex;
use crate::index::query_optimization::optimized_filter::ConditionCheckerFn;
use crate::index::query_optimization::payload_provider::PayloadProvider;
use crate::payload_storage::query_checker::{
check_field_condition, check_is_empty_condition, check_is_null_condition,
check_field_condition, check_is_empty_condition, check_is_null_condition, check_payload,
select_nested_indexes,
};
use crate::types::{
AnyVariants, Condition, FieldCondition, FloatPayloadType, GeoBoundingBox, GeoRadius, Match,
MatchAny, MatchText, MatchValue, PointOffsetType, Range, ValueVariants,
MatchAny, MatchText, MatchValue, OwnedPayloadRef, PayloadContainer, PointOffsetType, Range,
ValueVariants,
};

pub fn condition_converter<'a>(
Expand All @@ -30,7 +34,7 @@ pub fn condition_converter<'a>(
.unwrap_or_else(|| {
Box::new(move |point_id| {
payload_provider.with_payload(point_id, |payload| {
check_field_condition(field_condition, &payload)
check_field_condition(field_condition, &payload, field_indexes)
})
})
}),
Expand All @@ -56,8 +60,55 @@ pub fn condition_converter<'a>(
.collect();
Box::new(move |point_id| segment_ids.contains(&point_id))
}
Condition::Nested(nested) => {
// Select indexes for nested fields. Trim nested part from key, so
// that nested condition can address fields without nested part.

// Example:
// Index for field `nested.field` will be stored under key `nested.field`
// And we have a query:
// {
// "nested": {
// "path": "nested",
// "filter": {
// ...
// "match": {"key": "field", "value": "value"}
// }
// }

// In this case we want to use `nested.field`, but we only have `field` in query.
// Therefore we need to trim `nested` part from key. So that query executor
// can address proper index for nested field.
let nested_path = nested.array_key();

let nested_indexes = select_nested_indexes(&nested_path, field_indexes);

Box::new(move |point_id| {
payload_provider.with_payload(point_id, |payload| {
let field_values = payload.get_value(&nested_path).values();

for value in field_values {
if let Value::Object(object) = value {
let get_payload = || OwnedPayloadRef::from(object);
if check_payload(
Box::new(get_payload),
// None because has_id in nested is not supported. So retrieving
// IDs throug the tracker would always return None.
None,
&nested.nested.filter,
point_id,
&nested_indexes,
) {
// If at least one nested object matches, return true
return true;
}
}
}
false
})
})
}
Condition::Filter(_) => unreachable!(),
Condition::Nested(_) => unreachable!(),
}
}

Expand Down
2 changes: 0 additions & 2 deletions lib/segment/src/index/query_optimization/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
pub mod condition_converter;
pub mod nested_filter;
pub mod nested_optimizer;
pub mod optimized_filter;
pub mod optimizer;
pub mod payload_provider;
223 changes: 0 additions & 223 deletions lib/segment/src/index/query_optimization/nested_filter.rs

This file was deleted.

Loading

0 comments on commit 2e5e029

Please sign in to comment.