Skip to content

Commit

Permalink
feat: add "any" to "match" statements (qdrant#1466)
Browse files Browse the repository at this point in the history
* feat: add "any" to "match" statements

Adds support for using "any" to match on a list of different possible values

* enable proper counting in cardinality estimation

* fmt

* upd OpenAPI

---------

Co-authored-by: Andrey Vasnetsov <andrey@vasnetsov.com>
  • Loading branch information
coszio and generall committed Mar 15, 2023
1 parent 5149d39 commit c414a77
Show file tree
Hide file tree
Showing 10 changed files with 309 additions and 7 deletions.
34 changes: 34 additions & 0 deletions docs/grpc/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@
- [RecommendBatchResponse](#qdrant-RecommendBatchResponse)
- [RecommendPoints](#qdrant-RecommendPoints)
- [RecommendResponse](#qdrant-RecommendResponse)
- [RepeatedIntegers](#qdrant-RepeatedIntegers)
- [RepeatedStrings](#qdrant-RepeatedStrings)
- [RetrievedPoint](#qdrant-RetrievedPoint)
- [RetrievedPoint.PayloadEntry](#qdrant-RetrievedPoint-PayloadEntry)
- [ScoredPoint](#qdrant-ScoredPoint)
Expand Down Expand Up @@ -1293,6 +1295,8 @@ The JSON representation for `Value` is JSON value.
| integer | [int64](#int64) | | Match integer |
| boolean | [bool](#bool) | | Match boolean |
| text | [string](#string) | | Match text |
| keywords | [RepeatedStrings](#qdrant-RepeatedStrings) | | Match multiple keywords |
| integers | [RepeatedIntegers](#qdrant-RepeatedIntegers) | | Match multiple integers |



Expand Down Expand Up @@ -1566,6 +1570,36 @@ The JSON representation for `Value` is JSON value.



<a name="qdrant-RepeatedIntegers"></a>

### RepeatedIntegers



| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| integers | [int64](#int64) | repeated | |






<a name="qdrant-RepeatedStrings"></a>

### RepeatedStrings



| Field | Type | Label | Description |
| ----- | ---- | ----- | ----------- |
| strings | [string](#string) | repeated | |






<a name="qdrant-RetrievedPoint"></a>

### RetrievedPoint
Expand Down
32 changes: 32 additions & 0 deletions docs/redoc/master/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -4148,6 +4148,9 @@
},
{
"$ref": "#/components/schemas/MatchText"
},
{
"$ref": "#/components/schemas/MatchAny"
}
]
},
Expand Down Expand Up @@ -4189,6 +4192,35 @@
}
}
},
"MatchAny": {
"description": "Exact match on any of the given values",
"type": "object",
"required": [
"any"
],
"properties": {
"any": {
"$ref": "#/components/schemas/AnyVariants"
}
}
},
"AnyVariants": {
"anyOf": [
{
"type": "array",
"items": {
"type": "string"
}
},
{
"type": "array",
"items": {
"type": "integer",
"format": "int64"
}
}
]
},
"Range": {
"description": "Range filter request",
"type": "object",
Expand Down
11 changes: 11 additions & 0 deletions lib/api/src/grpc/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use segment::types::{PayloadSelector, WithPayloadInterface};
use tonic::Status;
use uuid::Uuid;

use super::qdrant::{RepeatedIntegers, RepeatedStrings};
use crate::grpc::models::{CollectionsResponse, VersionInfo};
use crate::grpc::qdrant::condition::ConditionOneOf;
use crate::grpc::qdrant::payload_index_params::IndexParams;
Expand Down Expand Up @@ -765,6 +766,8 @@ impl TryFrom<Match> for segment::types::Match {
MatchValue::Integer(int) => int.into(),
MatchValue::Boolean(flag) => flag.into(),
MatchValue::Text(text) => segment::types::Match::Text(text.into()),
MatchValue::Keywords(kwds) => kwds.strings.into(),
MatchValue::Integers(ints) => ints.integers.into(),
}),
_ => Err(Status::invalid_argument("Malformed Match condition")),
}
Expand All @@ -782,6 +785,14 @@ impl From<segment::types::Match> for Match {
segment::types::Match::Text(segment::types::MatchText { text }) => {
MatchValue::Text(text)
}
segment::types::Match::Any(any) => match any.any {
segment::types::AnyVariants::Keywords(strings) => {
MatchValue::Keywords(RepeatedStrings { strings })
}
segment::types::AnyVariants::Integers(integers) => {
MatchValue::Integers(RepeatedIntegers { integers })
}
},
};
Self {
match_value: Some(match_value),
Expand Down
10 changes: 10 additions & 0 deletions lib/api/src/grpc/proto/points.proto
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,19 @@ message Match {
int64 integer = 2; // Match integer
bool boolean = 3; // Match boolean
string text = 4; // Match text
RepeatedStrings keywords = 5; // Match multiple keywords
RepeatedIntegers integers = 6; // Match multiple integers
}
}

message RepeatedStrings {
repeated string strings = 1;
}

message RepeatedIntegers {
repeated int64 integers = 1;
}

message Range {
optional double lt = 1;
optional double gt = 2;
Expand Down
20 changes: 19 additions & 1 deletion lib/api/src/grpc/qdrant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2294,7 +2294,7 @@ pub struct FieldCondition {
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Match {
#[prost(oneof = "r#match::MatchValue", tags = "1, 2, 3, 4")]
#[prost(oneof = "r#match::MatchValue", tags = "1, 2, 3, 4, 5, 6")]
pub match_value: ::core::option::Option<r#match::MatchValue>,
}
/// Nested message and enum types in `Match`.
Expand All @@ -2314,10 +2314,28 @@ pub mod r#match {
/// Match text
#[prost(string, tag = "4")]
Text(::prost::alloc::string::String),
/// Match multiple keywords
#[prost(message, tag = "5")]
Keywords(super::RepeatedStrings),
/// Match multiple integers
#[prost(message, tag = "6")]
Integers(super::RepeatedIntegers),
}
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct RepeatedStrings {
#[prost(string, repeated, tag = "1")]
pub strings: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct RepeatedIntegers {
#[prost(int64, repeated, tag = "1")]
pub integers: ::prost::alloc::vec::Vec<i64>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Range {
#[prost(double, optional, tag = "1")]
pub lt: ::core::option::Option<f64>,
Expand Down
29 changes: 27 additions & 2 deletions lib/segment/src/index/field_index/map_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ use crate::entry::entry_point::{OperationError, OperationResult};
use crate::index::field_index::{
CardinalityEstimation, PayloadBlockCondition, PayloadFieldIndex, PrimaryCondition, ValueIndexer,
};
use crate::index::query_estimator::combine_should_estimations;
use crate::telemetry::PayloadIndexTelemetry;
use crate::types::{
FieldCondition, IntPayloadType, Match, MatchValue, PayloadKeyType, PointOffsetType,
ValueVariants,
AnyVariants, FieldCondition, IntPayloadType, Match, MatchAny, MatchValue, PayloadKeyType,
PointOffsetType, ValueVariants,
};

/// HashMap-based type of index
Expand Down Expand Up @@ -219,6 +220,18 @@ impl PayloadFieldIndex for MapIndex<String> {
.push(PrimaryCondition::Condition(condition.clone()));
Some(estimation)
}
Some(Match::Any(MatchAny {
any: AnyVariants::Keywords(keywords),
})) => {
let estimations = keywords
.iter()
.map(|keyword| self.match_cardinality(keyword))
.collect::<Vec<_>>();
Some(combine_should_estimations(
&estimations,
self.indexed_points,
))
}
_ => None,
}
}
Expand Down Expand Up @@ -284,6 +297,18 @@ impl PayloadFieldIndex for MapIndex<IntPayloadType> {
.push(PrimaryCondition::Condition(condition.clone()));
Some(estimation)
}
Some(Match::Any(MatchAny {
any: AnyVariants::Integers(integers),
})) => {
let estimations = integers
.iter()
.map(|integer| self.match_cardinality(integer))
.collect::<Vec<_>>();
Some(combine_should_estimations(
&estimations,
self.indexed_points,
))
}
_ => None,
}
}
Expand Down
25 changes: 22 additions & 3 deletions lib/segment/src/index/query_optimization/condition_converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use crate::index::query_optimization::optimizer::IndexesMap;
use crate::index::query_optimization::payload_provider::PayloadProvider;
use crate::payload_storage::query_checker::{check_field_condition, check_is_empty_condition};
use crate::types::{
Condition, FieldCondition, FloatPayloadType, GeoBoundingBox, GeoRadius, Match, MatchText,
MatchValue, PointOffsetType, Range, ValueVariants,
AnyVariants, Condition, FieldCondition, FloatPayloadType, GeoBoundingBox, GeoRadius, Match,
MatchAny, MatchText, MatchValue, PointOffsetType, Range, ValueVariants,
};

pub fn condition_converter<'a>(
Expand Down Expand Up @@ -169,7 +169,7 @@ pub fn get_match_checkers(index: &FieldIndex, cond_match: Match) -> Option<Condi
}
}))
}
(_, _) => None,
_ => None,
},
Match::Text(MatchText { text }) => match index {
FieldIndex::FullTextIndex(full_text_index) => {
Expand All @@ -183,5 +183,24 @@ pub fn get_match_checkers(index: &FieldIndex, cond_match: Match) -> Option<Condi
}
_ => None,
},
Match::Any(MatchAny { any }) => match (any, index) {
(AnyVariants::Keywords(list), FieldIndex::KeywordIndex(index)) => {
Some(Box::new(move |point_id: PointOffsetType| {
match index.get_values(point_id) {
None => false,
Some(values) => values.iter().any(|k| list.contains(k)),
}
}))
}
(AnyVariants::Integers(list), FieldIndex::IntMapIndex(index)) => {
Some(Box::new(move |point_id: PointOffsetType| {
match index.get_values(point_id) {
None => false,
Some(values) => values.iter().any(|i| list.contains(i)),
}
}))
}
_ => None,
},
}
}
11 changes: 10 additions & 1 deletion lib/segment/src/payload_storage/condition_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
use serde_json::Value;

use crate::types::{
GeoBoundingBox, GeoRadius, Match, MatchText, MatchValue, Range, ValueVariants, ValuesCount,
AnyVariants, GeoBoundingBox, GeoRadius, Match, MatchAny, MatchText, MatchValue, Range,
ValueVariants, ValuesCount,
};

pub trait ValueChecker {
Expand Down Expand Up @@ -32,6 +33,14 @@ impl ValueChecker for Match {
Value::String(stored) => stored.contains(text),
_ => false,
},
Match::Any(MatchAny { any }) => match (payload, any) {
(Value::String(stored), AnyVariants::Keywords(list)) => list.contains(stored),
(Value::Number(stored), AnyVariants::Integers(list)) => stored
.as_i64()
.map(|num| list.contains(&num))
.unwrap_or(false),
_ => false,
},
}
}
}
Expand Down
Loading

0 comments on commit c414a77

Please sign in to comment.