Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite nested filters again #1935

Merged
merged 12 commits into from
May 22, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ mod tests {
use itertools::Itertools;
use parking_lot::RwLock;
use segment::entry::entry_point::SegmentEntry;
use segment::types::{Distance, PayloadSchemaType};
use segment::types::{Distance, PayloadContainer, PayloadSchemaType};
use serde_json::{json, Value};
use tempfile::Builder;

Expand Down
4 changes: 3 additions & 1 deletion lib/collection/src/grouping/aggregator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use itertools::Itertools;
use ordered_float::OrderedFloat;
use segment::data_types::groups::GroupId;
use segment::spaces::tools::{peek_top_largest_iterable, peek_top_smallest_iterable};
use segment::types::{ExtendedPointId, Order, PointIdType, ScoreType, ScoredPoint};
use segment::types::{
ExtendedPointId, Order, PayloadContainer, PointIdType, ScoreType, ScoredPoint,
};
use serde_json::Value;

use super::types::AggregatorError::{self, *};
Expand Down
2 changes: 1 addition & 1 deletion lib/collection/src/operations/payload_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ impl SplitByShard for SetPayload {

#[cfg(test)]
mod tests {
use segment::types::Payload;
use segment::types::{Payload, PayloadContainer};
use serde_json::Value;

use super::*;
Expand Down
2 changes: 1 addition & 1 deletion lib/collection/tests/collection_restore_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use collection::operations::point_ops::{
use collection::operations::types::ScrollRequest;
use collection::operations::CollectionUpdateOperations;
use itertools::Itertools;
use segment::types::{PayloadSelectorExclude, WithPayloadInterface};
use segment::types::{PayloadContainer, PayloadSelectorExclude, WithPayloadInterface};
use serde_json::Value;
use tempfile::Builder;

Expand Down
59 changes: 55 additions & 4 deletions lib/segment/src/index/query_optimization/condition_converter.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
use std::collections::HashSet;

use serde_json::Value;

use crate::common::utils::IndexesMap;
use crate::id_tracker::IdTrackerSS;
use crate::index::field_index::FieldIndex;
use crate::index::query_optimization::optimized_filter::ConditionCheckerFn;
use crate::index::query_optimization::payload_provider::PayloadProvider;
use crate::payload_storage::query_checker::{
check_field_condition, check_is_empty_condition, check_is_null_condition,
check_field_condition, check_is_empty_condition, check_is_null_condition, check_payload,
select_nested_indexes,
};
use crate::types::{
AnyVariants, Condition, FieldCondition, FloatPayloadType, GeoBoundingBox, GeoRadius, Match,
MatchAny, MatchText, MatchValue, PointOffsetType, Range, ValueVariants,
MatchAny, MatchText, MatchValue, OwnedPayloadRef, PayloadContainer, PointOffsetType, Range,
ValueVariants,
};

pub fn condition_converter<'a>(
Expand All @@ -30,7 +34,7 @@ pub fn condition_converter<'a>(
.unwrap_or_else(|| {
Box::new(move |point_id| {
payload_provider.with_payload(point_id, |payload| {
check_field_condition(field_condition, &payload)
check_field_condition(field_condition, &payload, field_indexes)
})
})
}),
Expand All @@ -56,8 +60,55 @@ pub fn condition_converter<'a>(
.collect();
Box::new(move |point_id| segment_ids.contains(&point_id))
}
Condition::Nested(nested) => {
// Select indexes for nested fields. Trim nested part from key, so
// that nested condition can address fields without nested part.

// Example:
// Index for field `nested.field` will be stored under key `nested.field`
// And we have a query:
// {
// "nested": {
// "path": "nested",
// "filter": {
// ...
// "match": {"key": "field", "value": "value"}
// }
// }

// In this case we want to use `nested.field`, but we only have `field` in query.
// Therefore we need to trim `nested` part from key. So that query executor
// can address proper index for nested field.
let nested_path = nested.array_key();

let nested_indexes = select_nested_indexes(&nested_path, field_indexes);

Box::new(move |point_id| {
payload_provider.with_payload(point_id, |payload| {
let field_values = payload.get_value(&nested_path).values();

for value in field_values {
if let Value::Object(object) = value {
let get_payload = || OwnedPayloadRef::from(object);
if check_payload(
Box::new(get_payload),
// None because has_id in nested is not supported. So retrieving
// IDs throug the tracker would always return None.
None,
&nested.nested.filter,
point_id,
&nested_indexes,
) {
// If at least one nested object matches, return true
return true;
}
}
}
false
Comment on lines +88 to +107
Copy link
Contributor

@coszio coszio May 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wanted to add this to #1940 but got merged first :)

Suggested change
let field_values = payload.get_value(&nested_path).values();
for value in field_values {
if let Value::Object(object) = value {
let get_payload = || OwnedPayloadRef::from(object);
if check_payload(
Box::new(get_payload),
// None because has_id in nested is not supported. So retrieving
// IDs throug the tracker would always return None.
None,
&nested.nested.filter,
point_id,
&nested_indexes,
) {
// If at least one nested object matches, return true
return true;
}
}
}
false
payload
.get_value(&nested_path)
.values()
.iter()
// Only use objects
.filter_map(|value| value.as_object())
// If at least one nested object matches, return true
.any(|object| {
check_payload(
Box::new(|| OwnedPayloadRef::from(object)),
// None because has_id in nested is not supported. So retrieving
// IDs throug the tracker would always return None.
None,
&nested.nested.filter,
point_id,
&nested_indexes,
)
})

})
})
}
Condition::Filter(_) => unreachable!(),
Condition::Nested(_) => unreachable!(),
}
}

Expand Down
2 changes: 0 additions & 2 deletions lib/segment/src/index/query_optimization/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
pub mod condition_converter;
pub mod nested_filter;
pub mod nested_optimizer;
pub mod optimized_filter;
pub mod optimizer;
pub mod payload_provider;
223 changes: 0 additions & 223 deletions lib/segment/src/index/query_optimization/nested_filter.rs

This file was deleted.

Loading