From 2516bb192eb872f8466f4acd558bf6391dff4db7 Mon Sep 17 00:00:00 2001 From: Chi Fujii Date: Fri, 12 May 2023 23:08:08 +0900 Subject: [PATCH] Support dot notation in projection (#2536) Closes #2430. --- .../aggregate_documents_compat_test.go | 29 +- integration/query_compat_test.go | 20 +- integration/query_projection_compat_test.go | 72 ++++- integration/shareddata/composites.go | 46 ++++ integration/shareddata/shareddata.go | 1 + internal/handlers/common/projection.go | 256 +++++++++++++++++- 6 files changed, 396 insertions(+), 28 deletions(-) diff --git a/integration/aggregate_documents_compat_test.go b/integration/aggregate_documents_compat_test.go index 80615ca43141..f1ae0488e50d 100644 --- a/integration/aggregate_documents_compat_test.go +++ b/integration/aggregate_documents_compat_test.go @@ -925,7 +925,9 @@ func TestAggregateCompatGroupSum(t *testing.T) { Remove("ArrayAndDocuments"). // TODO: handle $sum of doubles near max precision. // https://github.com/FerretDB/FerretDB/issues/2300 - Remove("Doubles") + Remove("Doubles"). + // TODO: https://github.com/FerretDB/FerretDB/issues/2616 + Remove("ArrayDocuments") testCases := map[string]aggregateStagesCompatTestCase{ "GroupNullID": { @@ -1189,12 +1191,6 @@ func TestAggregateCompatSort(t *testing.T) { }}}}, }, - "DotNotation": { - pipeline: bson.A{bson.D{{"$sort", bson.D{ - {"v.foo", 1}, - {"_id", 1}, // sort by _id when v is the same. - }}}}, - }, "DotNotationIndex": { pipeline: bson.A{bson.D{{"$sort", bson.D{ {"v.0", 1}, @@ -1235,6 +1231,25 @@ func TestAggregateCompatSort(t *testing.T) { testAggregateStagesCompat(t, testCases) } +func TestAggregateCompatSortDotNotation(t *testing.T) { + t.Parallel() + + providers := shareddata.AllProviders(). + // TODO: https://github.com/FerretDB/FerretDB/issues/2617 + Remove("ArrayDocuments") + + testCases := map[string]aggregateStagesCompatTestCase{ + "DotNotation": { + pipeline: bson.A{bson.D{{"$sort", bson.D{ + {"v.foo", 1}, + {"_id", 1}, // sort by _id when v is the same. + }}}}, + }, + } + + testAggregateStagesCompatWithProviders(t, providers, testCases) +} + func TestAggregateCompatUnwind(t *testing.T) { t.Parallel() diff --git a/integration/query_compat_test.go b/integration/query_compat_test.go index deb81d23666b..720a0cfda650 100644 --- a/integration/query_compat_test.go +++ b/integration/query_compat_test.go @@ -254,10 +254,6 @@ func TestQueryCompatSort(t *testing.T) { resultType: emptyResult, }, - "DotNotation": { - filter: bson.D{}, - sort: bson.D{{"v.foo", 1}, {"_id", 1}}, - }, "DotNotationIndex": { filter: bson.D{}, sort: bson.D{{"v.0", 1}, {"_id", 1}}, @@ -296,6 +292,22 @@ func TestQueryCompatSort(t *testing.T) { testQueryCompat(t, testCases) } +func TestQueryCompatSortDotNotation(t *testing.T) { + t.Parallel() + + providers := shareddata.AllProviders(). + // TODO: https://github.com/FerretDB/FerretDB/issues/2618 + Remove("ArrayDocuments") + + testCases := map[string]queryCompatTestCase{ + "DotNotation": { + filter: bson.D{}, + sort: bson.D{{"v.foo", 1}, {"_id", 1}}, + }, + } + testQueryCompatWithProviders(t, providers, testCases) +} + func TestQueryCompatSkip(t *testing.T) { t.Parallel() diff --git a/integration/query_projection_compat_test.go b/integration/query_projection_compat_test.go index 27de972fb66e..80b8510513a6 100644 --- a/integration/query_projection_compat_test.go +++ b/integration/query_projection_compat_test.go @@ -75,6 +75,10 @@ func TestQueryProjectionCompat(t *testing.T) { filter: bson.D{}, projection: bson.D{{"foo", 1.24}, {"bar", true}}, }, + "Include2FieldsReverse": { + filter: bson.D{}, + projection: bson.D{{"bar", true}, {"foo", 1.24}}, + }, "Exclude2Fields": { filter: bson.D{}, projection: bson.D{{"foo", int32(0)}, {"bar", false}}, @@ -141,33 +145,89 @@ func TestQueryProjectionCompat(t *testing.T) { "DotNotationInclude": { filter: bson.D{}, projection: bson.D{{"v.foo", true}}, - skip: "https://github.com/FerretDB/FerretDB/issues/2430", }, "DotNotationIncludeTwo": { filter: bson.D{}, projection: bson.D{{"v.foo", true}, {"v.array", true}}, - skip: "https://github.com/FerretDB/FerretDB/issues/2430", + }, + "DotNotationIncludeTwoReverse": { + filter: bson.D{}, + projection: bson.D{{"v.array", true}, {"v.foo", true}}, + }, + "DotNotationIncludeTwoArray": { + filter: bson.D{}, + projection: bson.D{{"v.foo", true}, {"v.bar", true}}, }, "DotNotationExclude": { filter: bson.D{}, projection: bson.D{{"v.foo", false}}, - skip: "https://github.com/FerretDB/FerretDB/issues/2430", }, "DotNotationExcludeTwo": { filter: bson.D{}, projection: bson.D{{"v.foo", false}, {"v.array", false}}, - skip: "https://github.com/FerretDB/FerretDB/issues/2430", }, "DotNotationExcludeSecondLevel": { filter: bson.D{}, projection: bson.D{{"v.array.42", false}}, - skip: "https://github.com/FerretDB/FerretDB/issues/2430", + }, + "DotNotationIncludeSecondLevel": { + filter: bson.D{}, + projection: bson.D{{"v.array.42", true}}, }, "DotNotationIncludeExclude": { filter: bson.D{}, projection: bson.D{{"v.foo", true}, {"v.array", false}}, resultType: emptyResult, - skip: "https://github.com/FerretDB/FerretDB/issues/2430", + }, + "DotNotation5LevelInclude": { + filter: bson.D{}, + projection: bson.D{{"v.a.b.c.d", true}}, + }, + "DotNotation5LevelExclude": { + filter: bson.D{}, + projection: bson.D{{"v.a.b.c.d", false}}, + }, + "DotNotation4LevelInclude": { + filter: bson.D{}, + projection: bson.D{{"v.a.b.c", true}}, + }, + "DotNotation4LevelExclude": { + filter: bson.D{}, + projection: bson.D{{"v.a.b.c", false}}, + }, + "DotNotationArrayInclude": { + filter: bson.D{}, + projection: bson.D{{"v.array.0", true}}, + }, + "DotNotationArrayExclude": { + filter: bson.D{}, + projection: bson.D{{"v.array.0", false}}, + }, + "DotNotationArrayPathInclude": { + filter: bson.D{}, + projection: bson.D{{"v.0.foo", true}}, + }, + "DotNotationArrayPathExclude": { + filter: bson.D{}, + projection: bson.D{{"v.0.foo", false}}, + }, + "DotNotationManyInclude": { + filter: bson.D{}, + projection: bson.D{ + {"v.42", true}, + {"v.non-existent", true}, + {"v.foo", true}, + {"v.array", true}, + }, + }, + "DotNotationManyExclude": { + filter: bson.D{}, + projection: bson.D{ + {"v.42", false}, + {"v.non-existent", false}, + {"v.foo", false}, + {"v.array", false}, + }, }, } diff --git a/integration/shareddata/composites.go b/integration/shareddata/composites.go index 5f71f46d1336..a572087fef06 100644 --- a/integration/shareddata/composites.go +++ b/integration/shareddata/composites.go @@ -185,6 +185,38 @@ var DocumentsDocuments = &Values[primitive.ObjectID]{ }, } +// DocumentsDeeplyNested contains documents nested in multiple levels for tests. +var DocumentsDeeplyNested = &Values[string]{ + name: "DocumentsDeeplyNested", + backends: []string{"ferretdb-pg", "mongodb"}, + data: map[string]any{ + "two": bson.D{{"a", bson.D{{"b", 12}}}}, + "three": bson.D{{"a", bson.D{{"b", bson.D{{"c", 12}}}}}}, + "four": bson.D{ + {"a", bson.D{ + {"b", bson.D{ + {"c", bson.D{ + {"d", 123}, + }}, + {"e", 13}, + }}, + {"f", 14}, + }}, + {"g", 15}, + }, + "array": bson.D{ + {"a", bson.D{ + {"b", bson.D{ + {"c", bson.A{1, 2}}, + {"e", 13}, + }}, + {"f", 14}, + }}, + {"g", 15}, + }, + }, +} + // ArrayStrings contains an array with string values for tests. // Tigris JSON schema validator contains extra properties to make it suitable for more tests. var ArrayStrings = &Values[string]{ @@ -380,5 +412,19 @@ var ArrayDocuments = &Values[string]{ bson.A{bson.D{{"bar", "hello"}}}, }}, }, + "array-three-documents": bson.A{ + bson.D{{ + "bar", + bson.A{bson.D{{"a", "b"}}}, + }}, + bson.D{{ + "foo", + bson.A{bson.D{{"bar", "hello"}}}, + }}, + bson.D{{ + "foo", + bson.A{bson.D{{"bar", "hello"}}}, + }}, + }, }, } diff --git a/integration/shareddata/shareddata.go b/integration/shareddata/shareddata.go index 2f9b223f0997..54363b9c2570 100644 --- a/integration/shareddata/shareddata.go +++ b/integration/shareddata/shareddata.go @@ -51,6 +51,7 @@ func AllProviders() Providers { DocumentsDoubles, DocumentsStrings, DocumentsDocuments, + DocumentsDeeplyNested, ArrayStrings, ArrayDoubles, diff --git a/internal/handlers/common/projection.go b/internal/handlers/common/projection.go index b955ada3be3e..2e790ddf25f1 100644 --- a/internal/handlers/common/projection.go +++ b/internal/handlers/common/projection.go @@ -221,24 +221,21 @@ func projectDocumentWithoutID(doc *types.Document, projection *types.Document, i ) case *types.Array, string, types.Binary, types.ObjectID, - time.Time, types.NullType, types.Regex, types.Timestamp: // all this types are treated as new fields value + time.Time, types.NullType, types.Regex, types.Timestamp: // all these types are treated as new fields value projected.Set(key, value) case bool: // field: bool - // process top level fields - if path.Len() == 1 { - if inclusion { - if docWithoutID.Has(key) { - projected.Set(key, must.NotFail(docWithoutID.Get(key))) - } - - continue + if inclusion { + // inclusion projection copies the field on the path from docWithoutID to projected. + if _, err = includeProjection(path, docWithoutID, projected); err != nil { + return nil, err } - projected.Remove(key) + continue } - // TODO: process dot notation here https://github.com/FerretDB/FerretDB/issues/2430 + // exclusion projection removes the field on the path in projected. + excludeProjection(path, projected) default: return nil, lazyerrors.Errorf("unsupported operation %s %v (%T)", key, value, value) } @@ -246,3 +243,240 @@ func projectDocumentWithoutID(doc *types.Document, projection *types.Document, i return projected, nil } + +// includeProjection copies the field on the path from source to projected. +// When an array is on the path, it returns the array containing any document +// with the same key. Dot notation with array index path does not include +// the field unlike document.SetByPath(path). +// Inclusion projection with non-existent path creates an empty document +// or an empty array based on what source has. +// It returns iterator errors other than ErrIteratorDone. +// If the projected contains field that is not expected in source, it panics. +// +// Example: "v.foo" path inclusion projection: +// {v: {foo: 1, bar: 1}} -> {v: {foo: 1}} +// {v: {bar: 1}} -> {v: {}} +// {v: [{bar: 1}]} -> {v: [{}]} +// {v: [{foo: 1}, {foo: 2}, {bar: 1}]} -> {v: [{foo: 1}, {foo: 2}, {}]} +// +// Example: "v.0.foo" path inclusion projection: +// {v: [{foo: 1}, {foo: 2}, {bar: 1}]} -> {v: [{}, {}, {}]} +func includeProjection(path types.Path, source any, projected *types.Document) (*types.Array, error) { + key := path.Prefix() + + switch source := source.(type) { + case *types.Document: + embeddedSource, err := source.Get(key) + if err != nil { + // key does not exist, nothing to set. + return nil, nil + } + + if path.Len() <= 1 { + // path reached suffix, set field in projected. + setBySourceOrder(key, embeddedSource, source, projected) + return nil, nil + } + + doc := new(types.Document) + + if projected.Has(key) { + // set doc if projected has field from other projection field. + v := must.NotFail(projected.Get(key)) + if d, ok := v.(*types.Document); ok { + doc = d + } + + if arr, ok := v.(*types.Array); ok { + // use next prefix key with arr value, allowing array to parse existing + // projection fields. + doc = must.NotFail(types.NewDocument(path.TrimPrefix().Prefix(), arr)) + } + } + + // when next prefix has an array use returned value arr, + // if it has a document, field in the doc is set by includeProjection. + arr, err := includeProjection(path.TrimPrefix(), embeddedSource, doc) + if err != nil { + return nil, err + } + + switch embeddedSource.(type) { + case *types.Document: + setBySourceOrder(key, doc, source, projected) + case *types.Array: + projected.Set(key, arr) + } + + return nil, nil + case *types.Array: + iter := source.Iterator() + defer iter.Close() + + arr := new(types.Array) + var inclusionExists bool + + if v, err := projected.Get(key); err == nil { + projectedArr, ok := v.(*types.Array) + if ok { + arr = projectedArr + inclusionExists = true + } + } + + i := 0 + + for { + _, arrElem, err := iter.Next() + if err != nil { + if errors.Is(err, iterator.ErrIteratorDone) { + break + } + + return nil, lazyerrors.Error(err) + } + + if _, ok := arrElem.(*types.Document); !ok { + continue + } + + doc := new(types.Document) + + if inclusionExists { + // when there are multiple inclusion fields, first inclusion + // inserts all documents from source to arr, they could be empty + // if it did not match previous inclusion fields. + // But number of documents in arr must be the same as number of documents + // in source. + var v any + + v, err = arr.Get(i) + if err != nil { + panic(err) + } + + docVal, ok := v.(*types.Document) + if !ok { + panic("projected field must be a document") + } + + doc = docVal + } else { + // first inclusion field, insert it to the doc. + arr.Append(doc) + } + + if _, err = includeProjection(path, arrElem, doc); err != nil { + return nil, err + } + + arr.Set(i, doc) + i++ + } + + return arr, nil + default: + // field is not a document or an array, nothing to set. + return nil, nil + } +} + +// excludeProjection removes the field on the path in projected. +// When an array is on the path, it checks if the array contains any document +// with the key to remove that document. This is not the case in document.Remove(key). +// Dot notation with array index path do not exclude unlike document.RemoveByPath(key). +// +// Examples: "v.foo" path exclusion projection: +// {v: {foo: 1}} -> {v: {}} +// {v: {foo: 1, bar: 1}} -> {v: {bar: 1}} +// {v: [{foo: 1}, {foo: 2}]} -> {v: [{}, {}]} +// {v: [{foo: 1}, {foo: 2}, {bar: 1}]} -> {v: [{}, {}, {bar: 1}]} +// +// Example: "v.0.foo" path exclusion projection: +// {v: [{foo: 1}, {foo: 2}]} -> {v: [{foo: 1}, {foo: 2}]} +func excludeProjection(path types.Path, projected any) { + key := path.Prefix() + + switch projected := projected.(type) { + case *types.Document: + embeddedSource, err := projected.Get(key) + if err != nil { + // key does not exist, nothing to exclude. + return + } + + if path.Len() <= 1 { + // path reached suffix, remove the field from the document. + projected.Remove(key) + return + } + + // recursively remove field from the embeddedSource. + excludeProjection(path.TrimPrefix(), embeddedSource) + + return + case *types.Array: + // modifies the field of projected, hence not using iterator. + for i := 0; i < projected.Len(); i++ { + arrElem := must.NotFail(projected.Get(i)) + + if _, ok := arrElem.(*types.Document); !ok { + // not a document, cannot possibly be part of path, do nothing. + continue + } + + excludeProjection(path, arrElem) + } + + return + default: + // not a path, nothing to exclude. + return + } +} + +// setBySourceOrder sets the key value field to projected in same field order as the source. +// Example: +// +// key: foo +// val: 1 +// source: {foo: 1, bar: 2} +// projected: {bar: 2} +// +// setBySourceOrder sets projected to {foo: 1, bar: 2} rather than adding it to the last field. +func setBySourceOrder(key string, val any, source, projected *types.Document) { + projectedKeys := projected.Keys() + + // newFieldIndex is where new field is to be inserted in projected document. + newFieldIndex := 0 + + for _, sourceKey := range source.Keys() { + if sourceKey == key { + break + } + + if newFieldIndex >= len(projectedKeys) { + break + } + + if sourceKey == projectedKeys[newFieldIndex] { + newFieldIndex++ + } + } + + tmp := projected.DeepCopy() + + // remove fields of projected from newFieldIndex to the end + for i := newFieldIndex; i < len(projectedKeys); i++ { + projected.Remove(projectedKeys[i]) + } + + projected.Set(key, val) + + // copy newFieldIndex-th to the end from tmp to projected + i := newFieldIndex + for _, key := range tmp.Keys()[newFieldIndex:] { + projected.Set(key, must.NotFail(tmp.Get(tmp.Keys()[i]))) + i++ + } +}