diff --git a/lib/collection/src/collection_manager/optimizers/vacuum_optimizer.rs b/lib/collection/src/collection_manager/optimizers/vacuum_optimizer.rs index 697ee7ad353..bce0f9edfb6 100644 --- a/lib/collection/src/collection_manager/optimizers/vacuum_optimizer.rs +++ b/lib/collection/src/collection_manager/optimizers/vacuum_optimizer.rs @@ -218,7 +218,7 @@ mod tests { use itertools::Itertools; use parking_lot::RwLock; use segment::entry::entry_point::SegmentEntry; - use segment::types::{Distance, PayloadSchemaType}; + use segment::types::{Distance, PayloadContainer, PayloadSchemaType}; use serde_json::{json, Value}; use tempfile::Builder; diff --git a/lib/collection/src/grouping/aggregator.rs b/lib/collection/src/grouping/aggregator.rs index 78b1076dbd8..5e8748edec9 100644 --- a/lib/collection/src/grouping/aggregator.rs +++ b/lib/collection/src/grouping/aggregator.rs @@ -5,7 +5,9 @@ use itertools::Itertools; use ordered_float::OrderedFloat; use segment::data_types::groups::GroupId; use segment::spaces::tools::{peek_top_largest_iterable, peek_top_smallest_iterable}; -use segment::types::{ExtendedPointId, Order, PointIdType, ScoreType, ScoredPoint}; +use segment::types::{ + ExtendedPointId, Order, PayloadContainer, PointIdType, ScoreType, ScoredPoint, +}; use serde_json::Value; use super::types::AggregatorError::{self, *}; diff --git a/lib/collection/src/operations/payload_ops.rs b/lib/collection/src/operations/payload_ops.rs index 46ebe4e5e12..7164b27eb74 100644 --- a/lib/collection/src/operations/payload_ops.rs +++ b/lib/collection/src/operations/payload_ops.rs @@ -181,7 +181,7 @@ impl SplitByShard for SetPayload { #[cfg(test)] mod tests { - use segment::types::Payload; + use segment::types::{Payload, PayloadContainer}; use serde_json::Value; use super::*; diff --git a/lib/collection/tests/collection_restore_test.rs b/lib/collection/tests/collection_restore_test.rs index 0f7dcffc578..fe8d8fce8f2 100644 --- a/lib/collection/tests/collection_restore_test.rs +++ b/lib/collection/tests/collection_restore_test.rs @@ -4,7 +4,7 @@ use collection::operations::point_ops::{ use collection::operations::types::ScrollRequest; use collection::operations::CollectionUpdateOperations; use itertools::Itertools; -use segment::types::{PayloadSelectorExclude, WithPayloadInterface}; +use segment::types::{PayloadContainer, PayloadSelectorExclude, WithPayloadInterface}; use serde_json::Value; use tempfile::Builder; diff --git a/lib/segment/src/index/query_optimization/condition_converter.rs b/lib/segment/src/index/query_optimization/condition_converter.rs index c01be8d152d..3699e57657e 100644 --- a/lib/segment/src/index/query_optimization/condition_converter.rs +++ b/lib/segment/src/index/query_optimization/condition_converter.rs @@ -1,16 +1,20 @@ use std::collections::HashSet; +use serde_json::Value; + use crate::common::utils::IndexesMap; use crate::id_tracker::IdTrackerSS; use crate::index::field_index::FieldIndex; use crate::index::query_optimization::optimized_filter::ConditionCheckerFn; use crate::index::query_optimization::payload_provider::PayloadProvider; use crate::payload_storage::query_checker::{ - check_field_condition, check_is_empty_condition, check_is_null_condition, + check_field_condition, check_is_empty_condition, check_is_null_condition, check_payload, + select_nested_indexes, }; use crate::types::{ AnyVariants, Condition, FieldCondition, FloatPayloadType, GeoBoundingBox, GeoRadius, Match, - MatchAny, MatchText, MatchValue, PointOffsetType, Range, ValueVariants, + MatchAny, MatchText, MatchValue, OwnedPayloadRef, PayloadContainer, PointOffsetType, Range, + ValueVariants, }; pub fn condition_converter<'a>( @@ -30,7 +34,7 @@ pub fn condition_converter<'a>( .unwrap_or_else(|| { Box::new(move |point_id| { payload_provider.with_payload(point_id, |payload| { - check_field_condition(field_condition, &payload) + check_field_condition(field_condition, &payload, field_indexes) }) }) }), @@ -56,8 +60,55 @@ pub fn condition_converter<'a>( .collect(); Box::new(move |point_id| segment_ids.contains(&point_id)) } + Condition::Nested(nested) => { + // Select indexes for nested fields. Trim nested part from key, so + // that nested condition can address fields without nested part. + + // Example: + // Index for field `nested.field` will be stored under key `nested.field` + // And we have a query: + // { + // "nested": { + // "path": "nested", + // "filter": { + // ... + // "match": {"key": "field", "value": "value"} + // } + // } + + // In this case we want to use `nested.field`, but we only have `field` in query. + // Therefore we need to trim `nested` part from key. So that query executor + // can address proper index for nested field. + let nested_path = nested.array_key(); + + let nested_indexes = select_nested_indexes(&nested_path, field_indexes); + + Box::new(move |point_id| { + payload_provider.with_payload(point_id, |payload| { + let field_values = payload.get_value(&nested_path).values(); + + for value in field_values { + if let Value::Object(object) = value { + let get_payload = || OwnedPayloadRef::from(object); + if check_payload( + Box::new(get_payload), + // None because has_id in nested is not supported. So retrieving + // IDs throug the tracker would always return None. + None, + &nested.nested.filter, + point_id, + &nested_indexes, + ) { + // If at least one nested object matches, return true + return true; + } + } + } + false + }) + }) + } Condition::Filter(_) => unreachable!(), - Condition::Nested(_) => unreachable!(), } } diff --git a/lib/segment/src/index/query_optimization/mod.rs b/lib/segment/src/index/query_optimization/mod.rs index 5d17ea9c90e..be5fc091386 100644 --- a/lib/segment/src/index/query_optimization/mod.rs +++ b/lib/segment/src/index/query_optimization/mod.rs @@ -1,6 +1,4 @@ pub mod condition_converter; -pub mod nested_filter; -pub mod nested_optimizer; pub mod optimized_filter; pub mod optimizer; pub mod payload_provider; diff --git a/lib/segment/src/index/query_optimization/nested_filter.rs b/lib/segment/src/index/query_optimization/nested_filter.rs deleted file mode 100644 index 73b54d69a8a..00000000000 --- a/lib/segment/src/index/query_optimization/nested_filter.rs +++ /dev/null @@ -1,223 +0,0 @@ -use bitvec::prelude::*; - -use crate::common::utils::{IndexesMap, JsonPathPayload}; -use crate::index::query_optimization::payload_provider::PayloadProvider; -use crate::payload_storage::nested_query_checker::{ - check_nested_is_empty_condition, check_nested_is_null_condition, nested_check_field_condition, -}; -use crate::types::{Condition, NestedCondition, PointOffsetType}; - -/// Given a point_id, returns the list of nested indices matching the condition and the total number of nested elements in the payload -type NestedMatchingIndicesFn<'a> = Box BitVec + 'a>; - -/// Apply `point_id` to `nested_checkers` and return the list of indices in the payload matching all conditions -pub fn find_indices_matching_all_conditions( - point_id: PointOffsetType, - nested_checkers: &[NestedMatchingIndicesFn], -) -> BitVec { - nested_checkers - .iter() - .map(|checker| checker(point_id)) - .reduce(|acc: BitVec, x: BitVec| acc & x) - .unwrap_or_default() -} - -/// Apply `point_id` to `nested_checkers` and return the list of indices in the payload matching none of the conditions -pub fn find_indices_matching_none_conditions( - point_id: PointOffsetType, - nested_checkers: &[NestedMatchingIndicesFn], -) -> BitVec { - let combined_mask = find_indices_matching_any_conditions(point_id, nested_checkers); - - debug_assert!(combined_mask.is_some(), "combined_mask should be Some"); - - combined_mask.map(|mask| !mask).unwrap_or_default() -} - -/// Apply `point_id` to `nested_checkers` and return the list of indices in the payload matching any of the conditions -pub fn find_indices_matching_any_conditions( - point_id: PointOffsetType, - nested_checkers: &[NestedMatchingIndicesFn], -) -> Option { - nested_checkers - .iter() - .map(|checker| checker(point_id)) - .reduce(|acc: BitVec, x: BitVec| acc | x) -} - -pub fn nested_conditions_converter<'a>( - conditions: &'a [Condition], - payload_provider: PayloadProvider, - field_indexes: &'a IndexesMap, - nested_path: JsonPathPayload, -) -> Vec> { - conditions - .iter() - .map(|condition| { - nested_condition_converter( - condition, - payload_provider.clone(), - field_indexes, - nested_path.clone(), - ) - }) - .collect() -} - -pub fn nested_condition_converter<'a>( - condition: &'a Condition, - payload_provider: PayloadProvider, - field_indexes: &'a IndexesMap, - nested_path: JsonPathPayload, -) -> NestedMatchingIndicesFn<'a> { - match condition { - Condition::Field(field_condition) => { - // Do not rely on existing indexes for nested fields because - // they are not retaining the structure of the nested fields (flatten vs unflatten) - // We would need specialized nested indexes. - Box::new(move |point_id| { - payload_provider.with_payload(point_id, |payload| { - nested_check_field_condition( - field_condition, - &payload, - &nested_path, - field_indexes, - ) - }) - }) - } - Condition::IsEmpty(is_empty) => Box::new(move |point_id| { - payload_provider.with_payload(point_id, |payload| { - check_nested_is_empty_condition(&nested_path, is_empty, &payload) - }) - }), - Condition::IsNull(is_null) => Box::new(move |point_id| { - payload_provider.with_payload(point_id, |payload| { - check_nested_is_null_condition(&nested_path, is_null, &payload) - }) - }), - Condition::HasId(_) => { - // No support for has_id in nested queries - Box::new(move |_| BitVec::default()) - } - Condition::Nested(nested) => { - Box::new(move |point_id| { - let mut bitvecs = Vec::with_capacity(3); - - // must - let must_matching = check_nested_must( - point_id, - nested, - field_indexes, - payload_provider.clone(), - nested_path.clone(), - ); - if let Some(must_matching) = must_matching { - bitvecs.push(must_matching); - } - - // must_not - let must_not_matching = check_nested_must_not( - point_id, - nested, - field_indexes, - payload_provider.clone(), - &nested_path, - ); - if let Some(must_not_matching) = must_not_matching { - bitvecs.push(must_not_matching); - } - - // should - let should_matching = check_nested_should( - point_id, - nested, - field_indexes, - payload_provider.clone(), - nested_path.clone(), - ); - if let Some(should_matching) = should_matching { - bitvecs.push(should_matching); - } - - // combine all bitvecs - bitvecs - .into_iter() - .reduce(|acc, x| { - debug_assert_eq!(acc.len(), x.len()); - acc & x - }) - .unwrap_or_default() - }) - } - Condition::Filter(_) => unreachable!(), - } -} - -fn check_nested_must( - point_id: PointOffsetType, - nested: &NestedCondition, - field_indexes: &IndexesMap, - payload_provider: PayloadProvider, - nested_path: JsonPathPayload, -) -> Option { - match &nested.filter().must { - None => None, - Some(musts_conditions) => { - let full_path = nested_path.extend(&nested.array_key()); - let nested_checkers = nested_conditions_converter( - musts_conditions, - payload_provider, - field_indexes, - full_path, - ); - let matches = find_indices_matching_all_conditions(point_id, &nested_checkers); - Some(matches) - } - } -} - -fn check_nested_must_not( - point_id: PointOffsetType, - nested: &NestedCondition, - field_indexes: &IndexesMap, - payload_provider: PayloadProvider, - nested_path: &JsonPathPayload, -) -> Option { - match &nested.filter().must_not { - None => None, - Some(musts_not_conditions) => { - let full_path = nested_path.extend(&nested.array_key()); - let matching_indices = nested_conditions_converter( - musts_not_conditions, - payload_provider, - field_indexes, - full_path, - ); - let matches = find_indices_matching_none_conditions(point_id, &matching_indices); - Some(matches) - } - } -} - -fn check_nested_should( - point_id: PointOffsetType, - nested: &NestedCondition, - field_indexes: &IndexesMap, - payload_provider: PayloadProvider, - nested_path: JsonPathPayload, -) -> Option { - match &nested.filter().should { - None => None, - Some(musts_not_conditions) => { - let full_path = nested_path.extend(&nested.array_key()); - let matching_indices = nested_conditions_converter( - musts_not_conditions, - payload_provider, - field_indexes, - full_path, - ); - find_indices_matching_any_conditions(point_id, &matching_indices) - } - } -} diff --git a/lib/segment/src/index/query_optimization/nested_optimizer.rs b/lib/segment/src/index/query_optimization/nested_optimizer.rs deleted file mode 100644 index 44b2131c65a..00000000000 --- a/lib/segment/src/index/query_optimization/nested_optimizer.rs +++ /dev/null @@ -1,91 +0,0 @@ -use crate::common::utils::{IndexesMap, JsonPathPayload}; -use crate::index::field_index::CardinalityEstimation; -use crate::index::query_estimator::{ - combine_must_estimations, combine_should_estimations, invert_estimation, -}; -use crate::index::query_optimization::nested_filter::{ - find_indices_matching_all_conditions, find_indices_matching_any_conditions, - find_indices_matching_none_conditions, nested_conditions_converter, -}; -use crate::index::query_optimization::optimized_filter::OptimizedCondition; -use crate::index::query_optimization::payload_provider::PayloadProvider; -use crate::types::{Condition, PointOffsetType}; - -pub fn optimize_nested_must<'a, F>( - conditions: &'a [Condition], - field_indexes: &'a IndexesMap, - payload_provider: PayloadProvider, - estimator: &F, - total: usize, - nested_path: JsonPathPayload, -) -> (Vec>, CardinalityEstimation) -where - F: Fn(&Condition) -> CardinalityEstimation, -{ - let nested_checker_fns = - nested_conditions_converter(conditions, payload_provider, field_indexes, nested_path); - let estimations: Vec<_> = conditions.iter().map(estimator).collect(); - - let merged = Box::new(move |point_id: PointOffsetType| { - let matches = find_indices_matching_all_conditions(point_id, &nested_checker_fns); - // if any of the nested path is matching for ALL nested condition - matches.count_ones() > 0 - }); - - let estimation = combine_must_estimations(&estimations, total); - (vec![OptimizedCondition::Checker(merged)], estimation) -} - -pub fn optimize_nested_must_not<'a, F>( - conditions: &'a [Condition], - field_indexes: &'a IndexesMap, - payload_provider: PayloadProvider, - estimator: &F, - total: usize, - nested_path: JsonPathPayload, -) -> (Vec>, CardinalityEstimation) -where - F: Fn(&Condition) -> CardinalityEstimation, -{ - let nested_checker_fns = - nested_conditions_converter(conditions, payload_provider, field_indexes, nested_path); - let estimations: Vec<_> = conditions - .iter() - .map(|condition| invert_estimation(&estimator(condition), total)) - .collect(); - - let merged = Box::new(move |point_id: PointOffsetType| { - let not_matching = find_indices_matching_none_conditions(point_id, &nested_checker_fns); - // if they are no nested path not matching ANY nested conditions - not_matching.count_ones() == 0 - }); - - let estimation = combine_must_estimations(&estimations, total); - (vec![OptimizedCondition::Checker(merged)], estimation) -} - -pub fn optimize_nested_should<'a, F>( - conditions: &'a [Condition], - field_indexes: &'a IndexesMap, - payload_provider: PayloadProvider, - estimator: &F, - total: usize, - nested_path: JsonPathPayload, -) -> (Vec>, CardinalityEstimation) -where - F: Fn(&Condition) -> CardinalityEstimation, -{ - let nested_checker_fns = - nested_conditions_converter(conditions, payload_provider, field_indexes, nested_path); - let estimations: Vec<_> = conditions.iter().map(estimator).collect(); - - let merged = Box::new(move |point_id: PointOffsetType| { - let matches = - find_indices_matching_any_conditions(point_id, &nested_checker_fns).unwrap_or_default(); - // if any of the nested path is matching for any nested condition - matches.count_ones() > 0 - }); - - let estimation = combine_should_estimations(&estimations, total); - (vec![OptimizedCondition::Checker(merged)], estimation) -} diff --git a/lib/segment/src/index/query_optimization/optimizer.rs b/lib/segment/src/index/query_optimization/optimizer.rs index f8ac36571d7..c3eaafd8264 100644 --- a/lib/segment/src/index/query_optimization/optimizer.rs +++ b/lib/segment/src/index/query_optimization/optimizer.rs @@ -2,16 +2,13 @@ use std::cmp::Reverse; use itertools::Itertools; -use crate::common::utils::{IndexesMap, JsonPathPayload}; +use crate::common::utils::IndexesMap; use crate::id_tracker::IdTrackerSS; use crate::index::field_index::CardinalityEstimation; use crate::index::query_estimator::{ combine_must_estimations, combine_should_estimations, invert_estimation, }; use crate::index::query_optimization::condition_converter::condition_converter; -use crate::index::query_optimization::nested_optimizer::{ - optimize_nested_must, optimize_nested_must_not, optimize_nested_should, -}; use crate::index::query_optimization::optimized_filter::{OptimizedCondition, OptimizedFilter}; use crate::index::query_optimization::payload_provider::PayloadProvider; use crate::types::{Condition, Filter}; @@ -43,7 +40,6 @@ pub fn optimize_filter<'a, F>( payload_provider: PayloadProvider, estimator: &F, total: usize, - nested_path: Option, ) -> (OptimizedFilter<'a>, CardinalityEstimation) where F: Fn(&Condition) -> CardinalityEstimation, @@ -53,25 +49,14 @@ where let optimized_filter = OptimizedFilter { should: filter.should.as_ref().and_then(|conditions| { if !conditions.is_empty() { - let (optimized_conditions, estimation) = if let Some(np) = &nested_path { - optimize_nested_should( - conditions, - field_indexes, - payload_provider.clone(), - estimator, - total, - np.clone(), - ) - } else { - optimize_should( - conditions, - id_tracker, - field_indexes, - payload_provider.clone(), - estimator, - total, - ) - }; + let (optimized_conditions, estimation) = optimize_should( + conditions, + id_tracker, + field_indexes, + payload_provider.clone(), + estimator, + total, + ); filter_estimations.push(estimation); Some(optimized_conditions) } else { @@ -80,25 +65,14 @@ where }), must: filter.must.as_ref().and_then(|conditions| { if !conditions.is_empty() { - let (optimized_conditions, estimation) = if let Some(np) = &nested_path { - optimize_nested_must( - conditions, - field_indexes, - payload_provider.clone(), - estimator, - total, - np.clone(), - ) - } else { - optimize_must( - conditions, - id_tracker, - field_indexes, - payload_provider.clone(), - estimator, - total, - ) - }; + let (optimized_conditions, estimation) = optimize_must( + conditions, + id_tracker, + field_indexes, + payload_provider.clone(), + estimator, + total, + ); filter_estimations.push(estimation); Some(optimized_conditions) } else { @@ -107,25 +81,14 @@ where }), must_not: filter.must_not.as_ref().and_then(|conditions| { if !conditions.is_empty() { - let (optimized_conditions, estimation) = if let Some(np) = &nested_path { - optimize_nested_must_not( - conditions, - field_indexes, - payload_provider.clone(), - estimator, - total, - np.clone(), - ) - } else { - optimize_must_not( - conditions, - id_tracker, - field_indexes, - payload_provider.clone(), - estimator, - total, - ) - }; + let (optimized_conditions, estimation) = optimize_must_not( + conditions, + id_tracker, + field_indexes, + payload_provider.clone(), + estimator, + total, + ); filter_estimations.push(estimation); Some(optimized_conditions) } else { @@ -154,18 +117,6 @@ where conditions .iter() .map(|condition| match condition { - Condition::Nested(nested_filter) => { - let (optimized_filter, estimation) = optimize_filter( - nested_filter.filter(), - id_tracker, - field_indexes, - payload_provider.clone(), - estimator, - total, - Some(JsonPathPayload::new(nested_filter.array_key())), - ); - (OptimizedCondition::Filter(optimized_filter), estimation) - } Condition::Filter(filter) => { let (optimized_filter, estimation) = optimize_filter( filter, @@ -174,7 +125,6 @@ where payload_provider.clone(), estimator, total, - None, ); (OptimizedCondition::Filter(optimized_filter), estimation) } diff --git a/lib/segment/src/index/struct_filter_context.rs b/lib/segment/src/index/struct_filter_context.rs index 5997a39a33f..a404e3567a9 100644 --- a/lib/segment/src/index/struct_filter_context.rs +++ b/lib/segment/src/index/struct_filter_context.rs @@ -30,7 +30,6 @@ impl<'a> StructFilterContext<'a> { payload_provider, estimator, total, - None, ); Self { optimized_filter } diff --git a/lib/segment/src/index/struct_payload_index.rs b/lib/segment/src/index/struct_payload_index.rs index 61121a3f339..0527ae9f813 100644 --- a/lib/segment/src/index/struct_payload_index.rs +++ b/lib/segment/src/index/struct_payload_index.rs @@ -31,8 +31,8 @@ use crate::payload_storage::{FilterContext, PayloadStorage}; use crate::telemetry::PayloadIndexTelemetry; use crate::types::{ infer_collection_value_type, infer_value_type, Condition, FieldCondition, Filter, - IsEmptyCondition, IsNullCondition, Payload, PayloadField, PayloadFieldSchema, PayloadKeyType, - PayloadKeyTypeRef, PayloadSchemaType, PointOffsetType, + IsEmptyCondition, IsNullCondition, Payload, PayloadContainer, PayloadField, PayloadFieldSchema, + PayloadKeyType, PayloadKeyTypeRef, PayloadSchemaType, PointOffsetType, }; pub const PAYLOAD_FIELD_INDEX_PATH: &str = "fields"; diff --git a/lib/segment/src/payload_storage/in_memory_payload_storage_impl.rs b/lib/segment/src/payload_storage/in_memory_payload_storage_impl.rs index eb6ebc3686a..4cb3897c923 100644 --- a/lib/segment/src/payload_storage/in_memory_payload_storage_impl.rs +++ b/lib/segment/src/payload_storage/in_memory_payload_storage_impl.rs @@ -62,6 +62,7 @@ mod tests { use serde_json::json; use super::*; + use crate::common::utils::IndexesMap; use crate::fixtures::payload_context_fixture::FixtureIdTracker; use crate::payload_storage::query_checker::check_payload; use crate::types::{Condition, FieldCondition, Filter, OwnedPayloadRef}; @@ -102,16 +103,17 @@ mod tests { let payload: RefCell> = RefCell::new(None); check_payload( - || { + Box::new(|| { eprintln!("request payload"); if payload.borrow().is_none() { payload.replace(Some(get_payload().into())); } payload.borrow().as_ref().cloned().unwrap() - }, - &id_tracker, + }), + Some(&id_tracker), &query, 0, + &IndexesMap::new(), ); } diff --git a/lib/segment/src/payload_storage/mod.rs b/lib/segment/src/payload_storage/mod.rs index bdac1ec9743..3247bc2fb8d 100644 --- a/lib/segment/src/payload_storage/mod.rs +++ b/lib/segment/src/payload_storage/mod.rs @@ -1,7 +1,6 @@ pub mod condition_checker; pub mod in_memory_payload_storage; pub mod in_memory_payload_storage_impl; -pub mod nested_query_checker; pub mod on_disk_payload_storage; mod payload_storage_base; pub mod payload_storage_enum; diff --git a/lib/segment/src/payload_storage/nested_query_checker.rs b/lib/segment/src/payload_storage/nested_query_checker.rs deleted file mode 100644 index 6ad4f8bc2fe..00000000000 --- a/lib/segment/src/payload_storage/nested_query_checker.rs +++ /dev/null @@ -1,454 +0,0 @@ -use std::ops::Deref; - -use bitvec::bitvec; -use bitvec::prelude::BitVec; -use serde_json::Value; - -use crate::common::utils::{IndexesMap, JsonPathPayload, MultiValue}; -use crate::payload_storage::condition_checker::ValueChecker; -use crate::types::{ - Condition, FieldCondition, Filter, IsEmptyCondition, IsNullCondition, OwnedPayloadRef, Payload, -}; - -/// Executes condition checks for all `must` conditions of the nester objects. -/// If there is at least one nested object that matches all `must` conditions, returns `true`. -/// If there are no conditions, returns `true`. -/// If there are no nested objects, returns `false`. -fn check_nested_must_conditions(checker: &F, must: &Option>) -> bool -where - F: Fn(&Condition) -> BitVec, -{ - match must { - None => true, - Some(conditions) => conditions - .iter() - .map(checker) - .reduce(|acc, matches| acc & matches) - .map(|matches: BitVec| matches.count_ones() > 0) - .unwrap_or(false), // At least one sub-object must match all conditions - } -} - -fn check_nested_must_not_conditions(checker: &F, must_not: &Option>) -> bool -where - F: Fn(&Condition) -> BitVec, -{ - match must_not { - None => true, - Some(conditions) => { - conditions - .iter() - .map(checker) - .reduce(|acc, matches| acc | matches) - // There is at least one sub-object that does not match any of the conditions - .map(|matches: BitVec| matches.count_zeros() > 0) - .unwrap_or(true) // If there are no sub-objects, then must_not conditions are satisfied - } - } -} - -pub fn check_nested_filter<'a, F>( - nested_path: &JsonPathPayload, - nested_filter: &Filter, - get_payload: F, -) -> bool -where - F: Fn() -> OwnedPayloadRef<'a>, -{ - let nested_checker = |condition: &Condition| match condition { - Condition::Field(field_condition) => nested_check_field_condition( - field_condition, - get_payload().deref(), - nested_path, - &Default::default(), - ), - Condition::IsEmpty(is_empty) => { - check_nested_is_empty_condition(nested_path, is_empty, get_payload().deref()) - } - Condition::IsNull(is_null) => { - check_nested_is_null_condition(nested_path, is_null, get_payload().deref()) - } - Condition::HasId(_) => unreachable!(), // Is there a use case for nested HasId? - Condition::Nested(_) => unreachable!(), // Several layers of nesting are not supported here - Condition::Filter(_) => unreachable!(), - }; - - nested_filter_checker(&nested_checker, nested_filter) -} - -pub fn nested_filter_checker(matching_paths: &F, nested_filter: &Filter) -> bool -where - F: Fn(&Condition) -> BitVec, -{ - check_nested_must_conditions(matching_paths, &nested_filter.must) - && check_nested_must_not_conditions(matching_paths, &nested_filter.must_not) -} - -/// Return element indices matching the condition in the payload -pub fn check_nested_is_empty_condition( - nested_path: &JsonPathPayload, - is_empty: &IsEmptyCondition, - payload: &Payload, -) -> BitVec { - let full_path = nested_path.extend(&is_empty.is_empty.key); - let field_values = payload.get_value(&full_path.path).values(); - let mut result = BitVec::with_capacity(field_values.len()); - for p in field_values { - match p { - Value::Null => result.push(true), - Value::Array(vec) if vec.is_empty() => result.push(true), - _ => result.push(false), - } - } - result -} - -/// Return element indices matching the condition in the payload -pub fn check_nested_is_null_condition( - nested_path: &JsonPathPayload, - is_null: &IsNullCondition, - payload: &Payload, -) -> BitVec { - let full_path = nested_path.extend(&is_null.is_null.key); - let field_values = payload.get_value(&full_path.path); - match field_values { - MultiValue::Single(None) => bitvec![1; 1], - MultiValue::Single(Some(v)) => { - if v.is_null() { - bitvec![1; 1] - } else { - bitvec![0; 1] - } - } - MultiValue::Multiple(multiple_values) => { - let mut paths = BitVec::with_capacity(multiple_values.len()); - for p in multiple_values { - let check_res = match p { - Value::Null => true, - Value::Array(vec) => vec.iter().any(|val| val.is_null()), - _ => false, - }; - paths.push(check_res); - } - paths - } - } -} - -/// Return indexes of the elements matching the condition in the payload values -pub fn nested_check_field_condition( - field_condition: &FieldCondition, - payload: &Payload, - nested_path: &JsonPathPayload, - field_indexes: &IndexesMap, -) -> BitVec { - let full_path = nested_path.extend(&field_condition.key); - let field_values = payload.get_value(&full_path.path).values(); - let mut result = BitVec::with_capacity(field_values.len()); - - let field_indexes = field_indexes.get(&full_path.path); - - for p in field_values { - // This covers a case, when a field index affects the result of the condition. - // Only required in the nested case, - // because non-nested payload is checked by the index directly. - let mut index_check_res = None; - if let Some(field_indexes) = field_indexes { - for index in field_indexes { - index_check_res = index.check_condition(field_condition, p); - if index_check_res.is_some() { - break; - } - } - } - // Fallback to regular condition check if index-aware check did not return a result - let res = index_check_res.unwrap_or_else(|| field_condition.check(p)); - result.push(res); - } - result -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use atomic_refcell::AtomicRefCell; - use serde_json::json; - use tempfile::Builder; - - use super::*; - use crate::common::rocksdb_wrapper::{open_db, DB_VECTOR_CF}; - use crate::id_tracker::simple_id_tracker::SimpleIdTracker; - use crate::id_tracker::IdTracker; - use crate::payload_storage::payload_storage_enum::PayloadStorageEnum; - use crate::payload_storage::query_checker::SimpleConditionChecker; - use crate::payload_storage::simple_payload_storage::SimplePayloadStorage; - use crate::payload_storage::{ConditionChecker, PayloadStorage}; - use crate::types::{ - FieldCondition, GeoBoundingBox, GeoPoint, GeoRadius, PayloadField, Range, ValuesCount, - }; - - #[test] - fn test_nested_condition_checker() { - let dir = Builder::new().prefix("db_dir").tempdir().unwrap(); - let db = open_db(dir.path(), &[DB_VECTOR_CF]).unwrap(); - - let germany_id: u32 = 0; - let payload_germany: Payload = json!( - { - "country": { - "name": "Germany", - "capital": "Berlin", - "cities": [ - { - "name": "Berlin", - "population": 3.7, - "location": { - "lon": 13.76116, - "lat": 52.33826, - }, - "sightseeing": ["Brandenburg Gate", "Reichstag"] - }, - { - "name": "Munich", - "population": 1.5, - "location": { - "lon": 11.57549, - "lat": 48.13743, - }, - "sightseeing": ["Marienplatz", "Olympiapark"] - }, - { - "name": "Hamburg", - "population": 1.8, - "location": { - "lon": 9.99368, - "lat": 53.55108, - }, - "sightseeing": ["Reeperbahn", "Elbphilharmonie"] - } - ], - } - }) - .into(); - - let japan_id: u32 = 1; - let payload_japan: Payload = json!( - { - "country": { - "name": "Japan", - "capital": "Tokyo", - "cities": [ - { - "name": "Tokyo", - "population": 13.5, - "location": { - "lon": 139.69171, - "lat": 35.6895, - }, - "sightseeing": ["Tokyo Tower", "Tokyo Skytree", "Tokyo Disneyland"] - }, - { - "name": "Osaka", - "population": 2.7, - "location": { - "lon": 135.50217, - "lat": 34.69374, - }, - "sightseeing": ["Osaka Castle", "Universal Studios Japan"] - }, - { - "name": "Kyoto", - "population": 1.5, - "location": { - "lon": 135.76803, - "lat": 35.01163, - }, - "sightseeing": ["Kiyomizu-dera", "Fushimi Inari-taisha"] - } - ], - } - }) - .into(); - - let boring_id: u32 = 2; - let payload_boring: Payload = json!( - { - "country": { - "name": "Boring", - "cities": [ - { - "name": "Boring-ville", - "population": 0, - "sightseeing": [], - }, - ], - } - }) - .into(); - - let mut payload_storage: PayloadStorageEnum = - SimplePayloadStorage::open(db.clone()).unwrap().into(); - let mut id_tracker = SimpleIdTracker::open(db).unwrap(); - - // point 0 - Germany - id_tracker.set_link((germany_id as u64).into(), 0).unwrap(); - payload_storage.assign(0, &payload_germany).unwrap(); - - // point 1 - Japan - id_tracker.set_link((japan_id as u64).into(), 1).unwrap(); - payload_storage.assign(1, &payload_japan).unwrap(); - - // point 2 - Boring - id_tracker.set_link((boring_id as u64).into(), 2).unwrap(); - payload_storage.assign(2, &payload_boring).unwrap(); - - let payload_checker = SimpleConditionChecker::new( - Arc::new(AtomicRefCell::new(payload_storage)), - Arc::new(AtomicRefCell::new(id_tracker)), - ); - - // single must range condition nested field in array - let population_range_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must(Condition::Field(FieldCondition::new_range( - "population".to_string(), - Range { - lt: None, - gt: Some(8.0), - gte: None, - lte: None, - }, - ))), - )); - - assert!(!payload_checker.check(germany_id, &population_range_condition)); - assert!(payload_checker.check(japan_id, &population_range_condition)); - assert!(!payload_checker.check(boring_id, &population_range_condition)); - - // single must_not range condition nested field in array - let population_range_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must_not(Condition::Field(FieldCondition::new_range( - "population".to_string(), - Range { - lt: None, - gt: Some(8.0), - gte: None, - lte: None, - }, - ))), - )); - - assert!(payload_checker.check(germany_id, &population_range_condition)); // all cities less than 8.0 - assert!(payload_checker.check(japan_id, &population_range_condition)); // tokyo is 13.5, but other cities are less than 8.0 - assert!(payload_checker.check(boring_id, &population_range_condition)); // has no cities - - // single must values_count condition nested field in array - let sightseeing_value_count_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must(Condition::Field(FieldCondition::new_values_count( - "sightseeing".to_string(), - ValuesCount { - lt: None, - gt: None, - gte: Some(3), - lte: None, - }, - ))), - )); - - assert!(!payload_checker.check(germany_id, &sightseeing_value_count_condition)); - assert!(payload_checker.check(japan_id, &sightseeing_value_count_condition)); - assert!(!payload_checker.check(boring_id, &sightseeing_value_count_condition)); - - // single must_not values_count condition nested field in array - let sightseeing_value_count_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must_not(Condition::Field(FieldCondition::new_values_count( - "sightseeing".to_string(), - ValuesCount { - lt: None, - gt: None, - gte: Some(3), - lte: None, - }, - ))), - )); - - assert!(payload_checker.check(germany_id, &sightseeing_value_count_condition)); - assert!(payload_checker.check(japan_id, &sightseeing_value_count_condition)); - assert!(payload_checker.check(boring_id, &sightseeing_value_count_condition)); - - // single IsEmpty condition nested field in array - let is_empty_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must(Condition::IsEmpty(IsEmptyCondition { - is_empty: PayloadField { - key: "sightseeing".to_string(), - }, - })), - )); - - assert!(!payload_checker.check(germany_id, &is_empty_condition)); - assert!(!payload_checker.check(japan_id, &is_empty_condition)); - assert!(payload_checker.check(boring_id, &is_empty_condition)); - - // single IsNull condition nested field in array - let is_empty_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must(Condition::IsNull(IsNullCondition { - is_null: PayloadField { - key: "location".to_string(), - }, - })), - )); - - assert!(!payload_checker.check(germany_id, &is_empty_condition)); - assert!(!payload_checker.check(japan_id, &is_empty_condition)); - assert!(payload_checker.check(boring_id, &is_empty_condition)); - - // single geo-bounding box in nested field in array - let location_close_to_berlin_box_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must(Condition::Field(FieldCondition::new_geo_bounding_box( - "location".to_string(), - GeoBoundingBox { - top_left: GeoPoint { - lon: 13.08835, - lat: 52.67551, - }, - bottom_right: GeoPoint { - lon: 13.76117, - lat: 52.33825, - }, - }, - ))), - )); - - // Germany has a city whose location is within the box - assert!(payload_checker.check(germany_id, &location_close_to_berlin_box_condition)); - assert!(!payload_checker.check(japan_id, &location_close_to_berlin_box_condition)); - assert!(!payload_checker.check(boring_id, &location_close_to_berlin_box_condition)); - - // single geo-bounding box in nested field in array - let location_close_to_berlin_radius_condition = Filter::new_must(Condition::new_nested( - "country.cities".to_string(), - Filter::new_must(Condition::Field(FieldCondition::new_geo_radius( - "location".to_string(), - GeoRadius { - center: GeoPoint { - lon: 13.76117, - lat: 52.33825, - }, - radius: 1000.0, - }, - ))), - )); - - // Germany has a city whose location is within the radius - assert!(payload_checker.check(germany_id, &location_close_to_berlin_radius_condition)); - assert!(!payload_checker.check(japan_id, &location_close_to_berlin_radius_condition)); - assert!(!payload_checker.check(boring_id, &location_close_to_berlin_radius_condition)); - } -} diff --git a/lib/segment/src/payload_storage/query_checker.rs b/lib/segment/src/payload_storage/query_checker.rs index 490fadc9f18..2ecec4b379a 100644 --- a/lib/segment/src/payload_storage/query_checker.rs +++ b/lib/segment/src/payload_storage/query_checker.rs @@ -1,18 +1,19 @@ use std::cell::RefCell; +use std::collections::HashMap; use std::ops::Deref; use std::sync::Arc; use atomic_refcell::AtomicRefCell; -use crate::common::utils::JsonPathPayload; +use crate::common::utils::IndexesMap; use crate::id_tracker::IdTrackerSS; +use crate::index::field_index::FieldIndex; use crate::payload_storage::condition_checker::ValueChecker; -use crate::payload_storage::nested_query_checker::check_nested_filter; use crate::payload_storage::payload_storage_enum::PayloadStorageEnum; use crate::payload_storage::ConditionChecker; use crate::types::{ Condition, FieldCondition, Filter, IsEmptyCondition, IsNullCondition, OwnedPayloadRef, Payload, - PointOffsetType, + PayloadContainer, PayloadKeyType, PointOffsetType, }; fn check_condition(checker: &F, condition: &Condition) -> bool @@ -67,32 +68,60 @@ where } } -pub fn check_payload<'a, F>( - get_payload: F, - id_tracker: &IdTrackerSS, +pub fn select_nested_indexes<'a, R>( + nested_path: &str, + field_indexes: &'a HashMap, +) -> HashMap> +where + R: AsRef>, +{ + let nested_prefix = format!("{}.", nested_path); + let nested_indexes: HashMap<_, _> = field_indexes + .iter() + .filter_map(|(key, indexes)| { + key.strip_prefix(&nested_prefix) + .map(|key| (key.into(), indexes.as_ref())) + }) + .collect(); + nested_indexes +} + +pub fn check_payload<'a, R>( + get_payload: Box OwnedPayloadRef<'a> + 'a>, + id_tracker: Option<&IdTrackerSS>, query: &Filter, point_id: PointOffsetType, + field_indexes: &HashMap, ) -> bool where - F: Fn() -> OwnedPayloadRef<'a>, + R: AsRef>, { let checker = |condition: &Condition| match condition { Condition::Field(field_condition) => { - check_field_condition(field_condition, get_payload().deref()) + check_field_condition(field_condition, get_payload().deref(), field_indexes) } Condition::IsEmpty(is_empty) => check_is_empty_condition(is_empty, get_payload().deref()), Condition::IsNull(is_null) => check_is_null_condition(is_null, get_payload().deref()), - Condition::HasId(has_id) => { - let external_id = match id_tracker.external_id(point_id) { - None => return false, - Some(id) => id, - }; - has_id.has_id.contains(&external_id) - } + Condition::HasId(has_id) => id_tracker + .and_then(|id_tracker| id_tracker.external_id(point_id)) + .map_or(false, |id| has_id.has_id.contains(&id)), Condition::Nested(nested) => { - let nested_filter = nested.filter(); - let nested_path = JsonPathPayload::new(nested.array_key()); - check_nested_filter(&nested_path, nested_filter, &get_payload) + let nested_path = nested.array_key(); + let nested_indexes = select_nested_indexes(&nested_path, field_indexes); + get_payload() + .get_value(&nested_path) + .values() + .iter() + .filter_map(|value| value.as_object()) + .any(|object| { + check_payload( + Box::new(|| OwnedPayloadRef::from(object)), + None, + &nested.nested.filter, + point_id, + &nested_indexes, + ) + }) } Condition::Filter(_) => unreachable!(), }; @@ -100,22 +129,57 @@ where check_filter(&checker, query) } -pub fn check_is_empty_condition(is_empty: &IsEmptyCondition, payload: &Payload) -> bool { +pub fn check_is_empty_condition( + is_empty: &IsEmptyCondition, + payload: &impl PayloadContainer, +) -> bool { payload.get_value(&is_empty.is_empty.key).check_is_empty() } -pub fn check_is_null_condition(is_null: &IsNullCondition, payload: &Payload) -> bool { +pub fn check_is_null_condition(is_null: &IsNullCondition, payload: &impl PayloadContainer) -> bool { payload.get_value(&is_null.is_null.key).check_is_null() } -pub fn check_field_condition(field_condition: &FieldCondition, payload: &Payload) -> bool { +pub fn check_field_condition( + field_condition: &FieldCondition, + payload: &impl PayloadContainer, + field_indexes: &HashMap, +) -> bool +where + R: AsRef>, +{ let field_values = payload.get_value(&field_condition.key); - - let mut res = false; - for p in field_values { - res |= field_condition.check(p); + let field_indexes = field_indexes.get(&field_condition.key); + + // This covers a case, when a field index affects the result of the condition. + if let Some(field_indexes) = field_indexes { + for p in field_values { + let mut index_checked = false; + for index in field_indexes.as_ref() { + if let Some(index_check_res) = index.check_condition(field_condition, p) { + if index_check_res { + // If at least one object matches the condition, we can return true + return true; + } + index_checked = true; + // If index check of the condition returned something, we don't need to check + // other indexes + break; + } + } + if !index_checked { + // If none of the indexes returned anything, we need to check the condition + // against the payload + if field_condition.check(p) { + return true; + } + } + } + false + } else { + // Fallback to regular condition check if there are no indexes for the field + field_values.into_iter().any(|p| field_condition.check(p)) } - res } /// Only used for testing @@ -143,8 +207,10 @@ impl ConditionChecker for SimpleConditionChecker { let payload_storage_guard = self.payload_storage.borrow(); let payload_ref_cell: RefCell> = RefCell::new(None); + let id_tracker = self.id_tracker.borrow(); + check_payload( - || { + Box::new(|| { if payload_ref_cell.borrow().is_none() { let payload_ptr = match payload_storage_guard.deref() { PayloadStorageEnum::InMemoryPayloadStorage(s) => { @@ -173,16 +239,15 @@ impl ConditionChecker for SimpleConditionChecker { } }; - payload_ref_cell.replace(Some(match payload_ptr { - None => (&self.empty_payload).into(), - Some(x) => x, - })); + payload_ref_cell + .replace(payload_ptr.or_else(|| Some((&self.empty_payload).into()))); } payload_ref_cell.borrow().as_ref().cloned().unwrap() - }, - self.id_tracker.borrow().deref(), + }), + Some(id_tracker.deref()), query, point_id, + &IndexesMap::new(), ) } } diff --git a/lib/segment/src/types.rs b/lib/segment/src/types.rs index 73997e1479b..428f249bb5e 100644 --- a/lib/segment/src/types.rs +++ b/lib/segment/src/types.rs @@ -664,6 +664,10 @@ impl TryFrom for GeoPoint { } } +pub trait PayloadContainer { + fn get_value(&self, path: &str) -> MultiValue<&Value>; +} + #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema)] pub struct Payload(pub Map); @@ -677,10 +681,6 @@ impl Payload { } } - pub fn get_value(&self, path: &str) -> MultiValue<&Value> { - utils::get_value_from_json_map(path, &self.0) - } - pub fn remove(&mut self, path: &str) -> Vec { utils::remove_value_from_json_map(path, &mut self.0).values() } @@ -702,6 +702,24 @@ impl Payload { } } +impl PayloadContainer for Map { + fn get_value(&self, path: &str) -> MultiValue<&Value> { + utils::get_value_from_json_map(path, self) + } +} + +impl PayloadContainer for Payload { + fn get_value(&self, path: &str) -> MultiValue<&Value> { + utils::get_value_from_json_map(path, &self.0) + } +} + +impl<'a> PayloadContainer for OwnedPayloadRef<'a> { + fn get_value(&self, path: &str) -> MultiValue<&Value> { + utils::get_value_from_json_map(path, self.deref()) + } +} + impl Default for Payload { fn default() -> Self { Payload(Map::new()) @@ -734,12 +752,12 @@ impl From> for Payload { #[derive(Clone)] pub enum OwnedPayloadRef<'a> { - Ref(&'a Payload), - Owned(Rc), + Ref(&'a Map), + Owned(Rc>), } impl<'a> Deref for OwnedPayloadRef<'a> { - type Target = Payload; + type Target = Map; fn deref(&self) -> &Self::Target { match self { @@ -749,8 +767,8 @@ impl<'a> Deref for OwnedPayloadRef<'a> { } } -impl<'a> AsRef for OwnedPayloadRef<'a> { - fn as_ref(&self) -> &Payload { +impl<'a> AsRef> for OwnedPayloadRef<'a> { + fn as_ref(&self) -> &Map { match self { OwnedPayloadRef::Ref(reference) => reference, OwnedPayloadRef::Owned(owned) => owned.deref(), @@ -760,12 +778,24 @@ impl<'a> AsRef for OwnedPayloadRef<'a> { impl<'a> From for OwnedPayloadRef<'a> { fn from(payload: Payload) -> Self { + OwnedPayloadRef::Owned(Rc::new(payload.0)) + } +} + +impl<'a> From> for OwnedPayloadRef<'a> { + fn from(payload: Map) -> Self { OwnedPayloadRef::Owned(Rc::new(payload)) } } impl<'a> From<&'a Payload> for OwnedPayloadRef<'a> { fn from(payload: &'a Payload) -> Self { + OwnedPayloadRef::Ref(&payload.0) + } +} + +impl<'a> From<&'a Map> for OwnedPayloadRef<'a> { + fn from(payload: &'a Map) -> Self { OwnedPayloadRef::Ref(payload) } } @@ -1038,7 +1068,7 @@ impl From> for Match { } /// Range filter request -#[derive(Debug, Deserialize, Serialize, JsonSchema, Clone, PartialEq)] +#[derive(Debug, Deserialize, Serialize, JsonSchema, Default, Clone, PartialEq)] #[serde(rename_all = "snake_case")] pub struct Range { /// point.key < range.lt diff --git a/lib/segment/tests/nested_filtering_test.rs b/lib/segment/tests/nested_filtering_test.rs index d0005c70f5d..a60b20d21ea 100644 --- a/lib/segment/tests/nested_filtering_test.rs +++ b/lib/segment/tests/nested_filtering_test.rs @@ -11,6 +11,7 @@ mod tests { use segment::payload_storage::PayloadStorage; use segment::types::{ Condition, FieldCondition, Filter, Match, Payload, PayloadSchemaType, PointOffsetType, + Range, }; use serde_json::json; use tempfile::Builder; @@ -95,112 +96,159 @@ mod tests { .set_indexed("arr1[].text", PayloadSchemaType::Text.into()) .unwrap(); - let nested_condition_1 = Condition::new_nested( - "arr1", - Filter { - must: Some(vec![ - // E.g. idx = 6 => { "a" = 1, "b" = 7, "c" = 1, "d" = 0 } - Condition::Field(FieldCondition::new_match("a", 1.into())), - Condition::Field(FieldCondition::new_match("c", 1.into())), - Condition::Field(FieldCondition::new_match("d", 0.into())), - ]), - should: None, - must_not: None, - }, - ); + { + let nested_condition_0 = Condition::new_nested( + "arr1", + Filter { + must: Some(vec![ + // E.g. idx = 6 => { "a" = 1, "b" = 7, "c" = 1, "d" = 0 } + Condition::Field(FieldCondition::new_match("a", 1.into())), + Condition::Field(FieldCondition::new_match("c", 1.into())), + ]), + should: None, + must_not: Some(vec![Condition::Field(FieldCondition::new_range( + "d", + Range { + lte: Some(1.into()), + ..Default::default() + }, + ))]), + }, + ); - let nested_filter_1 = Filter::new_must(nested_condition_1); + let nested_filter_0 = Filter::new_must(nested_condition_0); + let res0: Vec<_> = index.query_points(&nested_filter_0).collect(); - let res1: Vec<_> = index.query_points(&nested_filter_1).collect(); + let filter_context = index.filter_context(&nested_filter_0); - let filter_context = index.filter_context(&nested_filter_1); + let check_res0: Vec<_> = (0..NUM_POINTS as PointOffsetType) + .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) + .collect(); - let check_res1: Vec<_> = (0..NUM_POINTS as PointOffsetType) - .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) - .collect(); + assert_eq!(res0, check_res0); + assert!(!res0.is_empty()); - assert_eq!(res1, check_res1); + // i % 2 + 1 == 1 + // i % 3 == 2 - assert!(!res1.is_empty()); - assert!(res1.contains(&6)); + // result = 2, 8, 14, ... + assert!(res0.contains(&2)); + assert!(res0.contains(&8)); + assert!(res0.contains(&14)); + } - let nested_condition_2 = Condition::new_nested( - "arr1", - Filter { - must: Some(vec![ - // E.g. idx = 6 => { "a" = 1, "b" = 7, "c" = 1, "d" = 0 } - Condition::Field(FieldCondition::new_match("a", 1.into())), - Condition::Field(FieldCondition::new_match( - "text", - Match::Text("c1".to_string().into()), - )), - Condition::Field(FieldCondition::new_match("d", 0.into())), - ]), - should: None, - must_not: None, - }, - ); + { + let nested_condition_1 = Condition::new_nested( + "arr1", + Filter { + must: Some(vec![ + // E.g. idx = 6 => { "a" = 1, "b" = 7, "c" = 1, "d" = 0 } + Condition::Field(FieldCondition::new_match("a", 1.into())), + Condition::Field(FieldCondition::new_match("c", 1.into())), + Condition::Field(FieldCondition::new_match("d", 0.into())), + ]), + should: None, + must_not: None, + }, + ); + + let nested_filter_1 = Filter::new_must(nested_condition_1); - let nested_filter_2 = Filter::new_must(nested_condition_2); + let res1: Vec<_> = index.query_points(&nested_filter_1).collect(); - let res2: Vec<_> = index.query_points(&nested_filter_2).collect(); + let filter_context = index.filter_context(&nested_filter_1); - let filter_context = index.filter_context(&nested_filter_2); + let check_res1: Vec<_> = (0..NUM_POINTS as PointOffsetType) + .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) + .collect(); - let check_res2: Vec<_> = (0..NUM_POINTS as PointOffsetType) - .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) - .collect(); + assert_eq!(res1, check_res1); - assert_eq!(res2, check_res2); + assert!(!res1.is_empty()); + assert!(res1.contains(&6)); + } - assert!(!res2.is_empty()); + { + let nested_condition_2 = Condition::new_nested( + "arr1", + Filter { + must: Some(vec![ + // E.g. idx = 6 => { "a" = 1, "b" = 7, "c" = 1, "d" = 0 } + Condition::Field(FieldCondition::new_match("a", 1.into())), + Condition::Field(FieldCondition::new_match( + "text", + Match::Text("c1".to_string().into()), + )), + Condition::Field(FieldCondition::new_match("d", 0.into())), + ]), + should: None, + must_not: None, + }, + ); + + let nested_filter_2 = Filter::new_must(nested_condition_2); + + let res2: Vec<_> = index.query_points(&nested_filter_2).collect(); + + let filter_context = index.filter_context(&nested_filter_2); + + let check_res2: Vec<_> = (0..NUM_POINTS as PointOffsetType) + .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) + .collect(); + + assert_eq!(res2, check_res2); + + assert!(!res2.is_empty()); + } - let nested_condition_3 = Condition::new_nested( - "arr1", - Filter { - must: Some(vec![Condition::Field(FieldCondition::new_match( - "b", - 1.into(), - ))]), - should: None, - must_not: None, - }, - ); - - let nester_condition_3_1 = Condition::new_nested( - "arr2", - Filter { - must: Some(vec![Condition::new_nested( - "arr3", - Filter { - must: Some(vec![Condition::Field(FieldCondition::new_match( - "b", - 10.into(), - ))]), - should: None, - must_not: None, - }, - )]), + { + let nested_condition_3 = Condition::new_nested( + "arr1", + Filter { + must: Some(vec![Condition::Field(FieldCondition::new_match( + "b", + 1.into(), + ))]), + should: None, + must_not: None, + }, + ); + + let nester_condition_3_1 = Condition::new_nested( + "arr2", + Filter { + must: Some(vec![Condition::new_nested( + "arr3", + Filter { + must: Some(vec![Condition::Field(FieldCondition::new_match( + "b", + 10.into(), + ))]), + should: None, + must_not: None, + }, + )]), + should: None, + must_not: None, + }, + ); + + let nested_filter_3 = Filter { + must: Some(vec![nested_condition_3, nester_condition_3_1]), should: None, must_not: None, - }, - ); - - let nested_filter_3 = Filter { - must: Some(vec![nested_condition_3, nester_condition_3_1]), - should: None, - must_not: None, - }; + }; - let res3: Vec<_> = index.query_points(&nested_filter_3).collect(); + let res3: Vec<_> = index.query_points(&nested_filter_3).collect(); - let filter_context = index.filter_context(&nested_filter_3); + let filter_context = index.filter_context(&nested_filter_3); - let check_res3: Vec<_> = (0..NUM_POINTS as PointOffsetType) - .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) - .collect(); + let check_res3: Vec<_> = (0..NUM_POINTS as PointOffsetType) + .filter(|point_id| filter_context.check(*point_id as PointOffsetType)) + .collect(); - assert_eq!(res3, check_res3); - assert!(!res3.is_empty()); + assert_eq!(res3, check_res3); + assert!(!res3.is_empty()); + } } } diff --git a/openapi/tests/openapi_integration/test_nested_payload_indexing.py b/openapi/tests/openapi_integration/test_nested_payload_indexing.py index 0bcfe79225e..1170dce45b8 100644 --- a/openapi/tests/openapi_integration/test_nested_payload_indexing.py +++ b/openapi/tests/openapi_integration/test_nested_payload_indexing.py @@ -203,14 +203,18 @@ def nested_payload_collection_setup(collection_name, on_disk_payload=False): "vector": [0.24, 0.18, 0.22, 0.44], "payload": { "country": { - "name": "Nauru", # Null capital for testing - "cities": [], # Empty data for testing + "name": "Nauru", + "cities": [ + { + "name": None, # Null capital for testing + } + ], } } }, { "id": 6, - "vector": [0.35, 0.08, 0.11, 0.44] + "vector": [0.35, 0.08, 0.11, 0.44], } ] } @@ -269,7 +273,7 @@ def test_nested_payload_indexing_operations(): assert response.json()['result']['payload_schema']['country.capital']['data_type'] == "keyword" assert response.json()['result']['payload_schema']['country.capital']['points'] == 4 assert response.json()['result']['payload_schema']['country.cities[].population']['data_type'] == "float" - assert response.json()['result']['payload_schema']['country.cities[].population']['points'] == 4 # indexed records + assert response.json()['result']['payload_schema']['country.cities[].population']['points'] == 4 # indexed records # Search nested through with payload index response = request_with_validation( @@ -326,7 +330,7 @@ def test_nested_payload_indexing_operations(): "filter": { "should": [ { - "key": "country.cities.population", # Do not implicitly do inside nested array + "key": "country.cities.population", # Do not implicitly do inside nested array "range": { "gte": 9.0, } @@ -456,4 +460,3 @@ def test_nested_payload_indexing_operations(): ) assert response.ok assert len(response.json()['result']['payload_schema']) == 0 - diff --git a/openapi/tests/openapi_integration/test_nested_payload_query.py b/openapi/tests/openapi_integration/test_nested_payload_query.py index 4dacc037f47..245b92cd4b4 100644 --- a/openapi/tests/openapi_integration/test_nested_payload_query.py +++ b/openapi/tests/openapi_integration/test_nested_payload_query.py @@ -135,10 +135,54 @@ def test_nested_payload_indexing_operations(): "key": "country.cities", "filter": { "must": [ + { + "must": [ + { + "key": "sightseeing", + "values_count": { + "gt": 3 + } + } + ] + } + ] + } + } + } + ] + }, + "limit": 3 + } + ) + assert response.ok + assert len(response.json()['result']['points']) == 1 + assert response.json()['result']['points'][0]['payload']['country']['name'] == "France" + + # Search with nested filter and must-not + response = request_with_validation( + api='/collections/{collection_name}/points/scroll', + method="POST", + path_params={'collection_name': collection_name}, + body={ + "filter": { + "must": [ + { + "nested": { + "key": "country.cities", + "filter": { + "must": [ + { + "key": "population", + "range": { + "gte": 9.0, + } + } + ], + "must_not": [ { "key": "sightseeing", "values_count": { - "gt": 3 + "gt": 1 } } ] @@ -151,8 +195,7 @@ def test_nested_payload_indexing_operations(): } ) assert response.ok - assert len(response.json()['result']['points']) == 1 - assert response.json()['result']['points'][0]['payload']['country']['name'] == "France" + assert len(response.json()['result']['points']) == 0 # Search with nested filter on indexed payload response = request_with_validation( @@ -283,8 +326,13 @@ def test_nested_payload_indexing_operations(): "range": { "lt": 8.0, } + }, + { + "is_null": { + "key": "name" + } } - ] + ], } } } @@ -296,8 +344,8 @@ def test_nested_payload_indexing_operations(): assert response.ok assert len(response.json()['result']['points']) == 2 # Only 2 records that do NOT have at least one city population < 8.0 - assert response.json()['result']['points'][0]['payload']['country']['name'] == "England" #London - assert response.json()['result']['points'][1]['payload']['country']['name'] == "Japan" #Tokyo + assert response.json()['result']['points'][0]['payload']['country']['name'] == "England" # London + assert response.json()['result']['points'][1]['payload']['country']['name'] == "Japan" # Tokyo # Search with nested filter on indexed & non payload (must_not) response = request_with_validation( @@ -323,6 +371,11 @@ def test_nested_payload_indexing_operations(): "values_count": { "gte": 3 } + }, + { + "is_null": { + "key": "name" + } } ] } @@ -336,7 +389,7 @@ def test_nested_payload_indexing_operations(): assert response.ok assert len(response.json()['result']['points']) == 1 # Only 1 record that do NOT have at least one city population < 8.0 and NOT more than 3 sightseeing - assert response.json()['result']['points'][0]['payload']['country']['name'] == "Japan" #Tokyo + assert response.json()['result']['points'][0]['payload']['country']['name'] == "Japan" # Tokyo # Search nested null field response = request_with_validation( @@ -366,9 +419,8 @@ def test_nested_payload_indexing_operations(): } ) assert response.ok - assert len(response.json()['result']['points']) == 2 + assert len(response.json()['result']['points']) == 1 assert response.json()['result']['points'][0]['payload']['country']['name'] == "Nauru" - assert response.json()['result']['points'][1]['id'] == 6 # Search nested geobox field geo_box_berlin = {