From 1643dc3a8bdf7746400a84ac3a053f0129c12c4b Mon Sep 17 00:00:00 2001 From: Thejas-bhat <35959007+Thejas-bhat@users.noreply.github.com> Date: Fri, 11 Nov 2022 22:11:53 +0530 Subject: [PATCH] Returning bytesRead as part of searchResult (#1752) * refactoring the bytes read while querying * bug fix: incrementing the bytes read at the right time * bug fix: fixing doc value and stored fields section bytes read computation * - updating stored fields' bytes read logic - bug fixes with respect to bytes read tracking in collector, tfr and docValueReader - fixing boolean searcher's BytesRead() API * - updating TestBytesRead and TestBytesReadStored with numbers after the bug fixes. * - updating the searcher interface and implementations with a SetBytesRead() API. Currently the only valid implmentations is with disjunct and conjunct searchers. - introducing BytesRead for newSnapshotIndexFieldDict. This is necessary for fuzzy, regex like queries. - updating the tests with the changes - updating rest of interface dependencies with no-op APIs in their implementations * - accounting the bytes read for prefix, range regexp queries in the seacher - minor code refactor with respect to tfr * a huge experimental change by introducing the concept of context right at the searcher level. this will essentially help in bytesRead tracking as well. * - using the IOstats reporter callback in term field reader and reporting the same as part of search result. - and yes, it works :D - removed the BytesRead and SetBytesRead API from searcher * - bug fixes with respect bytes read tracking in regexp queries - fixed a race condition around the newSnapshotIndexFieldDict - using the callbacks to return the bytes read value with respect to fieldDict in regexp, fuzzy etc type of queries, where there is a index.FieldDict creation involved (which loads the termDictionary for that field) - updated the tests reflecting the bug fixes. - removing the BytesRead API for index.TermFieldreader * - including the review comments and minor code cleanup * - addressing review comments, minor code cleanup - removing bytesRead from search.DocumentMatch * bug fixes with respect to reporting of the bytesRead * updating test plans * updating go.mod and go.sum * added some comments * minor renaming of function name --- document/document.go | 11 ++- go.mod | 14 +-- go.sum | 28 +++--- index/scorch/builder_test.go | 2 +- index/scorch/reader_test.go | 14 ++- index/scorch/scorch_test.go | 4 +- index/scorch/snapshot_index.go | 61 +++++++----- index/scorch/snapshot_index_dict.go | 11 ++- index/scorch/snapshot_index_tfr.go | 21 +++- index/upsidedown/field_dict.go | 4 + index/upsidedown/index_reader.go | 5 +- index/upsidedown/reader_test.go | 10 +- index/upsidedown/upsidedown_test.go | 2 +- index_alias_impl.go | 4 + index_impl.go | 45 ++++++--- index_test.go | 89 ++++++++--------- search.go | 16 +-- search/collector/search_test.go | 14 ++- search/collector/topn.go | 16 +-- search/query/bool_field.go | 6 +- search/query/boolean.go | 13 +-- search/query/conjunction.go | 7 +- search/query/date_range.go | 5 +- search/query/disjunction.go | 7 +- search/query/docid.go | 6 +- search/query/fuzzy.go | 6 +- search/query/geo_boundingbox.go | 11 ++- search/query/geo_boundingpolygon.go | 5 +- search/query/geo_distance.go | 5 +- search/query/geo_shape.go | 5 +- search/query/ip_range.go | 7 +- search/query/match.go | 9 +- search/query/match_all.go | 5 +- search/query/match_none.go | 3 +- search/query/match_phrase.go | 7 +- search/query/multi_phrase.go | 5 +- search/query/numeric_range.go | 5 +- search/query/phrase.go | 5 +- search/query/prefix.go | 6 +- search/query/query.go | 3 +- search/query/query_string.go | 6 +- search/query/regexp.go | 5 +- search/query/term.go | 6 +- search/query/term_range.go | 5 +- search/query/wildcard.go | 5 +- search/scorer/scorer_conjunction.go | 9 +- search/scorer/scorer_disjunction.go | 2 +- search/scorer/scorer_term.go | 1 - search/search.go | 6 +- search/searcher/search_boolean.go | 3 +- search/searcher/search_boolean_test.go | 98 +++++++++---------- search/searcher/search_conjunction.go | 8 +- search/searcher/search_conjunction_test.go | 46 ++++----- search/searcher/search_disjunction.go | 15 +-- search/searcher/search_disjunction_heap.go | 5 +- search/searcher/search_disjunction_slice.go | 4 +- search/searcher/search_disjunction_test.go | 30 +++--- search/searcher/search_docid.go | 3 +- search/searcher/search_docid_test.go | 2 +- search/searcher/search_filter.go | 3 +- search/searcher/search_fuzzy.go | 54 +++++++--- search/searcher/search_fuzzy_test.go | 12 +-- search/searcher/search_geoboundingbox.go | 26 ++--- search/searcher/search_geoboundingbox_test.go | 6 +- search/searcher/search_geopointdistance.go | 20 ++-- .../searcher/search_geopointdistance_test.go | 4 +- search/searcher/search_geopolygon.go | 9 +- search/searcher/search_geopolygon_test.go | 2 +- search/searcher/search_geoshape.go | 7 +- .../searcher/search_geoshape_circle_test.go | 2 +- .../searcher/search_geoshape_envelope_test.go | 2 +- ...search_geoshape_geometrycollection_test.go | 2 +- .../search_geoshape_linestring_test.go | 2 +- .../searcher/search_geoshape_points_test.go | 2 +- .../searcher/search_geoshape_polygon_test.go | 4 +- search/searcher/search_ip_range.go | 5 +- search/searcher/search_match_all.go | 4 +- search/searcher/search_match_all_test.go | 4 +- search/searcher/search_multi_term.go | 42 ++++---- search/searcher/search_numeric_range.go | 24 ++++- search/searcher/search_phrase.go | 15 +-- search/searcher/search_phrase_test.go | 4 +- search/searcher/search_regexp.go | 43 +++++--- search/searcher/search_regexp_test.go | 4 +- search/searcher/search_term.go | 9 +- search/searcher/search_term_prefix.go | 10 +- search/searcher/search_term_range.go | 10 +- search/searcher/search_term_range_test.go | 7 +- search/searcher/search_term_test.go | 2 +- test/versus_test.go | 6 +- 90 files changed, 657 insertions(+), 450 deletions(-) diff --git a/document/document.go b/document/document.go index 1a6050f0a..54fd6d442 100644 --- a/document/document.go +++ b/document/document.go @@ -30,9 +30,14 @@ func init() { } type Document struct { - id string `json:"id"` - Fields []Field `json:"fields"` - CompositeFields []*CompositeField + id string `json:"id"` + Fields []Field `json:"fields"` + CompositeFields []*CompositeField + StoredFieldsSize uint64 +} + +func (d *Document) StoredFieldsBytes() uint64 { + return d.StoredFieldsSize } func NewDocument(id string) *Document { diff --git a/go.mod b/go.mod index e4e74dee3..6772f4fa2 100644 --- a/go.mod +++ b/go.mod @@ -5,23 +5,23 @@ go 1.18 require ( github.com/RoaringBitmap/roaring v0.9.4 github.com/bits-and-blooms/bitset v1.2.0 - github.com/blevesearch/bleve_index_api v1.0.4 + github.com/blevesearch/bleve_index_api v1.0.5 github.com/blevesearch/geo v0.1.15 github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 github.com/blevesearch/gtreap v0.1.1 - github.com/blevesearch/scorch_segment_api/v2 v2.1.3 + github.com/blevesearch/scorch_segment_api/v2 v2.1.4 github.com/blevesearch/segment v0.9.0 github.com/blevesearch/snowball v0.6.1 github.com/blevesearch/snowballstem v0.9.0 github.com/blevesearch/upsidedown_store_api v1.0.1 github.com/blevesearch/vellum v1.0.9 - github.com/blevesearch/zapx/v11 v11.3.6 - github.com/blevesearch/zapx/v12 v12.3.6 - github.com/blevesearch/zapx/v13 v13.3.6 - github.com/blevesearch/zapx/v14 v14.3.6 - github.com/blevesearch/zapx/v15 v15.3.6 + github.com/blevesearch/zapx/v11 v11.3.7 + github.com/blevesearch/zapx/v12 v12.3.7 + github.com/blevesearch/zapx/v13 v13.3.7 + github.com/blevesearch/zapx/v14 v14.3.7 + github.com/blevesearch/zapx/v15 v15.3.7 github.com/couchbase/moss v0.2.0 github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v1.4.0 diff --git a/go.sum b/go.sum index 3c3c6c99c..4f1b2e36d 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/blevesearch/bleve_index_api v1.0.3/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4= -github.com/blevesearch/bleve_index_api v1.0.4 h1:mtlzsyJjMIlDngqqB1mq8kPryUMIuEVVbRbJHOWEexU= -github.com/blevesearch/bleve_index_api v1.0.4/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= +github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw= +github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= github.com/blevesearch/geo v0.1.15 h1:0NybEduqE5fduFRYiUKF0uqybAIFKXYjkBdXKYn7oA4= github.com/blevesearch/geo v0.1.15/go.mod h1:cRIvqCdk3cgMhGeHNNe6yPzb+w56otxbfo1FBJfR2Pc= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA= @@ -18,8 +18,8 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= -github.com/blevesearch/scorch_segment_api/v2 v2.1.3 h1:2UzpR2dR5DvSZk8tVJkcQ7D5xhoK/UBelYw8ttBHrRQ= -github.com/blevesearch/scorch_segment_api/v2 v2.1.3/go.mod h1:eZrfp1y+lUh+DzFjUcTBUSnKGuunyFIpBIvqYVzJfvc= +github.com/blevesearch/scorch_segment_api/v2 v2.1.4 h1:LmGmo5twU3gV+natJbKmOktS9eMhokPGKWuR+jX84vk= +github.com/blevesearch/scorch_segment_api/v2 v2.1.4/go.mod h1:PgVnbbg/t1UkgezPDu8EHLi1BHQ17xUwsFdU6NnOYS0= github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac= github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ= github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A= @@ -30,16 +30,16 @@ github.com/blevesearch/upsidedown_store_api v1.0.1 h1:1SYRwyoFLwG3sj0ed89RLtM15a github.com/blevesearch/upsidedown_store_api v1.0.1/go.mod h1:MQDVGpHZrpe3Uy26zJBf/a8h0FZY6xJbthIMm8myH2Q= github.com/blevesearch/vellum v1.0.9 h1:PL+NWVk3dDGPCV0hoDu9XLLJgqU4E5s/dOeEJByQ2uQ= github.com/blevesearch/vellum v1.0.9/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k= -github.com/blevesearch/zapx/v11 v11.3.6 h1:50jET4HUJ6eCqGxdhUt+mjybMvEX2MWyqLGtCx3yUgc= -github.com/blevesearch/zapx/v11 v11.3.6/go.mod h1:B0CzJRj/pS7hJIroflRtFsa9mRHpMSucSgre0FVINns= -github.com/blevesearch/zapx/v12 v12.3.6 h1:G304NHBLgQeZ+IHK/XRCM0nhHqAts8MEvHI6LhoDNM4= -github.com/blevesearch/zapx/v12 v12.3.6/go.mod h1:iYi7tIKpauwU5os5wTxJITixr5Km21Hl365otMwdaP0= -github.com/blevesearch/zapx/v13 v13.3.6 h1:vavltQHNdjQezhLZs5nIakf+w/uOa1oqZxB58Jy/3Ig= -github.com/blevesearch/zapx/v13 v13.3.6/go.mod h1:X+FsTwCU8qOHtK0d/ArvbOH7qiIgViSQ1GQvcR6LSkI= -github.com/blevesearch/zapx/v14 v14.3.6 h1:b9lub7TvcwUyJxK/cQtnN79abngKxsI7zMZnICU0WhE= -github.com/blevesearch/zapx/v14 v14.3.6/go.mod h1:9X8W3XoikagU0rwcTqwZho7p9cC7m7zhPZO94S4wUvM= -github.com/blevesearch/zapx/v15 v15.3.6 h1:VSswg/ysDxHgitcNkpUNtaTYS4j3uItpXWLAASphl6k= -github.com/blevesearch/zapx/v15 v15.3.6/go.mod h1:5DbhhDTGtuQSns1tS2aJxJLPc91boXCvjOMeCLD1saM= +github.com/blevesearch/zapx/v11 v11.3.7 h1:Y6yIAF/DVPiqZUA/jNgSLXmqewfzwHzuwfKyfdG+Xaw= +github.com/blevesearch/zapx/v11 v11.3.7/go.mod h1:Xk9Z69AoAWIOvWudNDMlxJDqSYGf90LS0EfnaAIvXCA= +github.com/blevesearch/zapx/v12 v12.3.7 h1:DfQ6rsmZfEK4PzzJJRXjiM6AObG02+HWvprlXQ1Y7eI= +github.com/blevesearch/zapx/v12 v12.3.7/go.mod h1:SgEtYIBGvM0mgIBn2/tQE/5SdrPXaJUaT/kVqpAPxm0= +github.com/blevesearch/zapx/v13 v13.3.7 h1:igIQg5eKmjw168I7av0Vtwedf7kHnQro/M+ubM4d2l8= +github.com/blevesearch/zapx/v13 v13.3.7/go.mod h1:yyrB4kJ0OT75UPZwT/zS+Ru0/jYKorCOOSY5dBzAy+s= +github.com/blevesearch/zapx/v14 v14.3.7 h1:gfe+fbWslDWP/evHLtp/GOvmNM3sw1BbqD7LhycBX20= +github.com/blevesearch/zapx/v14 v14.3.7/go.mod h1:9J/RbOkqZ1KSjmkOes03AkETX7hrXT0sFMpWH4ewC4w= +github.com/blevesearch/zapx/v15 v15.3.7 h1:r8ZcNrlcMj2TmLlbNH16wZiL9reU0s7C2rAQKjFDtuE= +github.com/blevesearch/zapx/v15 v15.3.7/go.mod h1:m7Y6m8soYUvS7MjN9eKlz1xrLCcmqfFadmu7GhWIrLY= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= diff --git a/index/scorch/builder_test.go b/index/scorch/builder_test.go index aaff060e9..82309e38c 100644 --- a/index/scorch/builder_test.go +++ b/index/scorch/builder_test.go @@ -102,7 +102,7 @@ func checkIndex(t *testing.T, path string, term []byte, field string, expectCoun } // run a search for hello - tfr, err := r.TermFieldReader(term, field, false, false, false) + tfr, err := r.TermFieldReader(nil, term, field, false, false, false) if err != nil { t.Errorf("error accessing term field reader: %v", err) } else { diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go index 68c7a3e57..0eebd92b9 100644 --- a/index/scorch/reader_test.go +++ b/index/scorch/reader_test.go @@ -82,7 +82,7 @@ func TestIndexReader(t *testing.T) { }() // first look for a term that doesn't exist - reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true) + reader, err := indexReader.TermFieldReader(nil, []byte("nope"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } @@ -95,7 +95,7 @@ func TestIndexReader(t *testing.T) { t.Fatal(err) } - reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) + reader, err = indexReader.TermFieldReader(nil, []byte("test"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } @@ -145,7 +145,7 @@ func TestIndexReader(t *testing.T) { }, }, } - tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true) + tfr, err := indexReader.TermFieldReader(nil, []byte("rice"), "desc", true, true, true) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -153,9 +153,7 @@ func TestIndexReader(t *testing.T) { if err != nil { t.Errorf("unexpected error: %v", err) } - // Ignoring the BytesRead value, since it doesn't have - // relevance in this type of test - match.BytesRead = 0 + if !reflect.DeepEqual(expectedMatch, match) { t.Errorf("got %#v, expected %#v", match, expectedMatch) } @@ -165,7 +163,7 @@ func TestIndexReader(t *testing.T) { } // now test usage of advance - reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) + reader, err = indexReader.TermFieldReader(nil, []byte("test"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } @@ -196,7 +194,7 @@ func TestIndexReader(t *testing.T) { } // now test creating a reader for a field that doesn't exist - reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true) + reader, err = indexReader.TermFieldReader(nil, []byte("water"), "doesnotexist", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go index fe85cc9e1..8c0d6ccf3 100644 --- a/index/scorch/scorch_test.go +++ b/index/scorch/scorch_test.go @@ -1528,7 +1528,7 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { } }() - termFieldReader, err := indexReader.TermFieldReader([]byte("mister"), "_all", true, true, true) + termFieldReader, err := indexReader.TermFieldReader(nil, []byte("mister"), "_all", true, true, true) if err != nil { t.Error(err) } @@ -2480,7 +2480,7 @@ func TestIndexSeekBackwardsStats(t *testing.T) { } defer reader.Close() - tfr, err := reader.TermFieldReader([]byte("cat"), "name", false, false, false) + tfr, err := reader.TermFieldReader(nil, []byte("cat"), "name", false, false, false) if err != nil { t.Fatalf("error getting term field readyer for name/cat: %v", err) } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 5db501a88..c0f3c7ac0 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -16,6 +16,7 @@ package scorch import ( "container/heap" + "context" "encoding/binary" "fmt" "os" @@ -37,6 +38,7 @@ import ( // re usable, threadsafe levenshtein builders var lb1, lb2 *lev.LevenshteinAutomatonBuilder +type diskStatsReporter segment.DiskStatsReporter type asynchSegmentResult struct { dict segment.TermDictionary dictItr segment.DictionaryIterator @@ -144,18 +146,17 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, randomLookup bool) (*IndexSnapshotFieldDict, error) { results := make(chan *asynchSegmentResult) + var totalBytesRead uint64 for index, segment := range i.segment { go func(index int, segment *SegmentSnapshot) { - var prevBytesRead uint64 - prevBytesRead = segment.segment.BytesRead() - dict, err := segment.segment.Dictionary(field) if err != nil { results <- &asynchSegmentResult{err: err} } else { - atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, - segment.segment.BytesRead()-prevBytesRead) - + if dictStats, ok := dict.(diskStatsReporter); ok { + atomic.AddUint64(&totalBytesRead, + dictStats.BytesRead()) + } if randomLookup { results <- &asynchSegmentResult{dict: dict} } else { @@ -193,6 +194,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, } } } + rv.bytesRead = totalBytesRead // after ensuring we've read all items on channel if err != nil { return nil, err @@ -403,7 +405,7 @@ func (i *IndexSnapshot) DocCount() (uint64, error) { func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { // FIXME could be done more efficiently directly, but reusing for simplicity - tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false) + tfr, err := i.TermFieldReader(nil, []byte(id), "_id", false, false, false) if err != nil { return nil, err } @@ -430,13 +432,17 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) rvd := document.NewDocument(id) - prevBytesRead := i.segment[segmentIndex].segment.BytesRead() err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool { if name == "_id" { return true } + // track uncompressed stored fields bytes as part of IO stats. + // However, ideally we'd need to track the compressed on-disk value + // Keeping that TODO for now until we have a cleaner way. + rvd.StoredFieldsSize += uint64(len(val)) + // copy value, array positions to preserve them beyond the scope of this callback value := append([]byte(nil), val...) arrayPos := append([]uint64(nil), pos...) @@ -464,9 +470,6 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { return nil, err } - if delta := i.segment[segmentIndex].segment.BytesRead() - prevBytesRead; delta > 0 { - atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, delta) - } return rvd, nil } @@ -500,7 +503,7 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) { func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) { // FIXME could be done more efficiently directly, but reusing for simplicity - tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false) + tfr, err := i.TermFieldReader(nil, []byte(id), "_id", false, false, false) if err != nil { return nil, err } @@ -518,10 +521,11 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err return next.ID, nil } -func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, +func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { rv := is.allocTermFieldReaderDicts(field) + rv.ctx = ctx rv.term = term rv.field = field rv.snapshot = is @@ -541,13 +545,15 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, if rv.dicts == nil { rv.dicts = make([]segment.TermDictionary, len(is.segment)) for i, segment := range is.segment { - prevBytesRead := segment.segment.BytesRead() + segBytesRead := segment.segment.BytesRead() + rv.incrementBytesRead(segBytesRead) dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } - if bytesRead := segment.segment.BytesRead(); bytesRead > prevBytesRead { - atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, bytesRead-prevBytesRead) + if dictStats, ok := dict.(diskStatsReporter); ok { + bytesRead := dictStats.BytesRead() + rv.incrementBytesRead(bytesRead) } rv.dicts[i] = dict } @@ -571,13 +577,11 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) if bytesRead := rv.postings[i].BytesRead(); prevBytesReadPL < bytesRead { - atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, - bytesRead-prevBytesReadPL) + rv.incrementBytesRead(bytesRead - prevBytesReadPL) } if bytesRead := rv.iterators[i].BytesRead(); prevBytesReadItr < bytesRead { - atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, - bytesRead-prevBytesReadItr) + rv.incrementBytesRead(bytesRead - prevBytesReadItr) } } atomic.AddUint64(&is.parent.stats.TotTermSearchersStarted, uint64(1)) @@ -634,6 +638,7 @@ func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{} } if uint64(len(i.fieldTFRs[tfr.field])) < i.getFieldTFRCacheThreshold() { + tfr.bytesRead = 0 i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr) } i.m2.Unlock() @@ -697,14 +702,10 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( } if ssvOk && ssv != nil && len(vFields) > 0 { - prevBytesRead := ss.segment.BytesRead() dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) if err != nil { return nil, nil, err } - if delta := ss.segment.BytesRead() - prevBytesRead; delta > 0 { - atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, delta) - } } if errCh != nil { @@ -733,6 +734,13 @@ type DocValueReader struct { currSegmentIndex int currCachedFields []string + + totalBytesRead uint64 + bytesRead uint64 +} + +func (dvr *DocValueReader) BytesRead() uint64 { + return dvr.totalBytesRead + dvr.bytesRead } func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, @@ -750,11 +758,16 @@ func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, if dvr.currSegmentIndex != segmentIndex { dvr.currSegmentIndex = segmentIndex dvr.currCachedFields = nil + dvr.totalBytesRead += dvr.bytesRead + dvr.bytesRead = 0 } dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment( dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs) + if dvr.dvs != nil { + dvr.bytesRead = dvr.dvs.BytesRead() + } return err } diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go index 0a2bd232e..658aa8148 100644 --- a/index/scorch/snapshot_index_dict.go +++ b/index/scorch/snapshot_index_dict.go @@ -28,9 +28,14 @@ type segmentDictCursor struct { } type IndexSnapshotFieldDict struct { - snapshot *IndexSnapshot - cursors []*segmentDictCursor - entry index.DictEntry + snapshot *IndexSnapshot + cursors []*segmentDictCursor + entry index.DictEntry + bytesRead uint64 +} + +func (i *IndexSnapshotFieldDict) BytesRead() uint64 { + return i.bytesRead } func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) } diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index bd20eb2e6..349620c71 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -16,10 +16,12 @@ package scorch import ( "bytes" + "context" "fmt" "reflect" "sync/atomic" + "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/size" index "github.com/blevesearch/bleve_index_api" segment "github.com/blevesearch/scorch_segment_api/v2" @@ -46,6 +48,12 @@ type IndexSnapshotTermFieldReader struct { currPosting segment.Posting currID index.IndexInternalID recycle bool + bytesRead uint64 + ctx context.Context +} + +func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) { + i.bytesRead += val } func (i *IndexSnapshotTermFieldReader) Size() int { @@ -95,7 +103,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // and they have to be added together before sending the bytesRead at this point // upstream. if delta := i.iterators[i.segmentOffset].BytesRead() - prevBytesRead; delta > 0 { - rv.BytesRead = delta + i.incrementBytesRead(delta) } return rv, nil @@ -138,7 +146,7 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo // FIXME do something better // for now, if we need to seek backwards, then restart from the beginning if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { - i2, err := i.snapshot.TermFieldReader(i.term, i.field, + i2, err := i.snapshot.TermFieldReader(nil, i.term, i.field, i.includeFreq, i.includeNorm, i.includeTermVectors) if err != nil { return nil, err @@ -189,6 +197,15 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 { } func (i *IndexSnapshotTermFieldReader) Close() error { + if i.ctx != nil { + statsCallbackFn := i.ctx.Value(search.SearchIOStatsCallbackKey) + if statsCallbackFn != nil { + // essentially before you close the TFR, you must report this + // reader's bytesRead value + statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead) + } + } + if i.snapshot != nil { atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1)) i.snapshot.recycleTermFieldReader(i) diff --git a/index/upsidedown/field_dict.go b/index/upsidedown/field_dict.go index c4be57740..4875680c9 100644 --- a/index/upsidedown/field_dict.go +++ b/index/upsidedown/field_dict.go @@ -51,6 +51,10 @@ func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTe } +func (r *UpsideDownCouchFieldDict) BytesRead() uint64 { + return 0 +} + func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { key, val, valid := r.iterator.Current() if !valid { diff --git a/index/upsidedown/index_reader.go b/index/upsidedown/index_reader.go index ff0986d57..5c164fc8c 100644 --- a/index/upsidedown/index_reader.go +++ b/index/upsidedown/index_reader.go @@ -15,6 +15,7 @@ package upsidedown import ( + "context" "reflect" "github.com/blevesearch/bleve/v2/document" @@ -35,7 +36,7 @@ type IndexReader struct { docCount uint64 } -func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { +func (i *IndexReader) TermFieldReader(ctx context.Context, term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) if fieldExists { return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors) @@ -223,3 +224,5 @@ func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, visitor index.DocValueVisitor) error { return dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor) } + +func (dvr *DocValueReader) BytesRead() uint64 { return 0 } diff --git a/index/upsidedown/reader_test.go b/index/upsidedown/reader_test.go index 9aac40a38..933723c9d 100644 --- a/index/upsidedown/reader_test.go +++ b/index/upsidedown/reader_test.go @@ -77,7 +77,7 @@ func TestIndexReader(t *testing.T) { }() // first look for a term that doesn't exist - reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true) + reader, err := indexReader.TermFieldReader(nil, []byte("nope"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } @@ -90,7 +90,7 @@ func TestIndexReader(t *testing.T) { t.Fatal(err) } - reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) + reader, err = indexReader.TermFieldReader(nil, []byte("test"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } @@ -128,7 +128,7 @@ func TestIndexReader(t *testing.T) { }, }, } - tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true) + tfr, err := indexReader.TermFieldReader(nil, []byte("rice"), "desc", true, true, true) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -145,7 +145,7 @@ func TestIndexReader(t *testing.T) { } // now test usage of advance - reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) + reader, err = indexReader.TermFieldReader(nil, []byte("test"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } @@ -173,7 +173,7 @@ func TestIndexReader(t *testing.T) { } // now test creating a reader for a field that doesn't exist - reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true) + reader, err = indexReader.TermFieldReader(nil, []byte("water"), "doesnotexist", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } diff --git a/index/upsidedown/upsidedown_test.go b/index/upsidedown/upsidedown_test.go index a4b56e719..9b9ab9091 100644 --- a/index/upsidedown/upsidedown_test.go +++ b/index/upsidedown/upsidedown_test.go @@ -1239,7 +1239,7 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { } }() - termFieldReader, err := indexReader.TermFieldReader([]byte("mister"), "_all", true, true, true) + termFieldReader, err := indexReader.TermFieldReader(nil, []byte("mister"), "_all", true, true, true) if err != nil { t.Error(err) } diff --git a/index_alias_impl.go b/index_alias_impl.go index 5a4dc5a48..a73dd6b8f 100644 --- a/index_alias_impl.go +++ b/index_alias_impl.go @@ -602,6 +602,10 @@ type indexAliasImplFieldDict struct { fieldDict index.FieldDict } +func (f *indexAliasImplFieldDict) BytesRead() uint64 { + return f.fieldDict.BytesRead() +} + func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) { return f.fieldDict.Next() } diff --git a/index_impl.go b/index_impl.go index 407f1ff5b..c5a0c46f4 100644 --- a/index_impl.go +++ b/index_impl.go @@ -469,7 +469,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } }() - searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ + // This callback and variable handles the tracking of bytes read + // 1. as part of creation of tfr and its Next() calls which is + // accounted by invoking this callback when the TFR is closed. + // 2. the docvalues portion (accounted in collector) and the retrieval + // of stored fields bytes (by LoadAndHighlightFields) + var totalBytesRead uint64 + sendBytesRead := func(bytesRead uint64) { + totalBytesRead += bytesRead + } + + ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, + search.SearchIOStatsCallbackFunc(sendBytesRead)) + + searcher, err := req.Query.Searcher(ctx, indexReader, i.m, search.SearcherOptions{ Explain: req.Explain, IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, Score: req.Score, @@ -481,6 +494,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr if serr := searcher.Close(); err == nil && serr != nil { err = serr } + if sr != nil { + sr.BytesRead = totalBytesRead + } + if sr, ok := indexReader.(*scorch.IndexSnapshot); ok { + sr.UpdateIOStats(totalBytesRead) + } }() if req.Facets != nil { @@ -528,21 +547,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr }() } } - var totalBytesRead uint64 - SendBytesRead := func(bytesRead uint64) { - totalBytesRead = bytesRead - } - ctx = context.WithValue(ctx, collector.SearchIOStatsCallbackKey, - collector.SearchIOStatsCallbackFunc(SendBytesRead)) err = coll.Collect(ctx, searcher, indexReader) if err != nil { return nil, err } - if sr, ok := indexReader.(*scorch.IndexSnapshot); ok { - sr.UpdateIOStats(totalBytesRead) - } hits := coll.Results() var highlighter highlight.Highlighter @@ -568,10 +578,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr if i.name != "" { hit.Index = i.name } - err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter) + err, storedFieldsBytes := LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter) if err != nil { return nil, err } + totalBytesRead += storedFieldsBytes } atomic.AddUint64(&i.stats.searches, 1) @@ -610,9 +621,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, indexName string, r index.IndexReader, - highlighter highlight.Highlighter) error { + highlighter highlight.Highlighter) (error, uint64) { + var totalStoredFieldsBytes uint64 if len(req.Fields) > 0 || highlighter != nil { doc, err := r.Document(hit.ID) + totalStoredFieldsBytes = doc.StoredFieldsBytes() if err == nil && doc != nil { if len(req.Fields) > 0 { fieldsToLoad := deDuplicate(req.Fields) @@ -676,11 +689,11 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, } else if doc == nil { // unexpected case, a doc ID that was found as a search hit // was unable to be found during document lookup - return ErrorIndexReadInconsistency + return ErrorIndexReadInconsistency, 0 } } - return nil + return nil, totalStoredFieldsBytes } // Fields returns the name of all the fields this @@ -880,6 +893,10 @@ type indexImplFieldDict struct { fieldDict index.FieldDict } +func (f *indexImplFieldDict) BytesRead() uint64 { + return f.fieldDict.BytesRead() +} + func (f *indexImplFieldDict) Next() (*index.DictEntry, error) { return f.fieldDict.Next() } diff --git a/index_test.go b/index_test.go index a7914ee63..933894204 100644 --- a/index_test.go +++ b/index_test.go @@ -237,7 +237,7 @@ func approxSame(actual, expected uint64) bool { return b - a } - return float64(modulus(actual, expected))/float64(expected) < float64(0.25) + return float64(modulus(actual, expected))/float64(expected) < float64(0.30) } func checkStatsOnIndexedBatch(indexPath string, indexMapping mapping.IndexMapping, @@ -395,27 +395,27 @@ func TestBytesRead(t *testing.T) { query := NewQueryStringQuery("united") searchRequest := NewSearchRequestOptions(query, int(10), 0, true) - _, err = idx.Search(searchRequest) + res, err := idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ := idx.StatsMap()["index"].(map[string]interface{}) prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) - if prevBytesRead != 28618 { - t.Fatalf("expected bytes read for query string 28618, got %v", + if prevBytesRead != 32349 && res.BytesRead == prevBytesRead { + t.Fatalf("expected bytes read for query string 32349, got %v", prevBytesRead) } // subsequent queries on the same field results in lesser amount // of bytes read because the segment static and dictionary is reused and not // loaded from mmap'd filed - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 23 { + if bytesRead-prevBytesRead != 23 && res.BytesRead == bytesRead-prevBytesRead { t.Fatalf("expected bytes read for query string 23, got %v", bytesRead-prevBytesRead) } @@ -425,14 +425,14 @@ func TestBytesRead(t *testing.T) { fuzz.FieldVal = "reviews.content" fuzz.Fuzziness = 2 searchRequest = NewSearchRequest(fuzz) - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 16556 { - t.Fatalf("expected bytes read for fuzzy query is 16556, got %v\n", + if bytesRead-prevBytesRead != 206545 && res.BytesRead == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for fuzzy query is 206545, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead @@ -441,64 +441,65 @@ func TestBytesRead(t *testing.T) { query = NewQueryStringQuery("united") searchRequest = NewSearchRequestOptions(query, int(0), 0, true) searchRequest.AddFacet("types", typeFacet) - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if !approxSame(bytesRead-prevBytesRead, 259) { - t.Fatalf("expected bytes read for faceted query is 259, got %v", + if !approxSame(bytesRead-prevBytesRead, 150) && res.BytesRead == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for faceted query is around 150, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead - min := float64(8000) - max := float64(8010) + min := float64(8660) + max := float64(8665) numRangeQuery := NewNumericRangeQuery(&min, &max) numRangeQuery.FieldVal = "id" searchRequest = NewSearchRequestOptions(numRangeQuery, int(10), 0, true) - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 1678 { - t.Fatalf("expected bytes read for numeric range query is 1678, got %v", + if bytesRead-prevBytesRead != 54945 && res.BytesRead == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for numeric range query is 54945, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead searchRequest = NewSearchRequestOptions(query, int(10), 0, true) searchRequest.Highlight = &HighlightRequest{} - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 676 { - t.Fatalf("expected bytes read for query with highlighter is 676, got %v", + if bytesRead-prevBytesRead != 60 && res.BytesRead == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for query with highlighter is 60, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead - disQuery := NewDisjunctionQuery(NewMatchQuery("united"), NewMatchQuery("hotel")) + disQuery := NewDisjunctionQuery(NewMatchQuery("hotel"), NewMatchQuery("united")) searchRequest = NewSearchRequestOptions(disQuery, int(10), 0, true) - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } // expectation is that the bytes read is roughly equal to sum of sub queries in - // the disjunction query plus sum static value + // the disjunction query plus the segment loading portion for the second subquery + // since it's created afresh and not reused stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 77 { - t.Fatalf("expected bytes read for disjunction query is 77, got %v", + if bytesRead-prevBytesRead != 18090 && res.BytesRead == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for disjunction query is 18090, got %v", bytesRead-prevBytesRead) } } @@ -572,33 +573,33 @@ func TestBytesReadStored(t *testing.T) { query := NewTermQuery("hotel") query.FieldVal = "reviews.content" searchRequest := NewSearchRequestOptions(query, int(10), 0, true) - _, err = idx.Search(searchRequest) + res, err := idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ := idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead != 15792 { - t.Fatalf("expected the bytes read stat to be around 15792, got %v", bytesRead) + if bytesRead != 25928 && bytesRead == res.BytesRead { + t.Fatalf("expected the bytes read stat to be around 25928, got %v", bytesRead) } prevBytesRead := bytesRead searchRequest = NewSearchRequestOptions(query, int(10), 0, true) - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 15 { + if bytesRead-prevBytesRead != 15 && bytesRead-prevBytesRead == res.BytesRead { t.Fatalf("expected the bytes read stat to be around 15, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead searchRequest = NewSearchRequestOptions(query, int(10), 0, true) searchRequest.Fields = []string{"*"} - _, err = idx.Search(searchRequest) + res, err = idx.Search(searchRequest) if err != nil { t.Error(err) } @@ -606,8 +607,9 @@ func TestBytesReadStored(t *testing.T) { stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 38278 { - t.Fatalf("expected the bytes read stat to be around 38278, got %v", err) + if bytesRead-prevBytesRead != 26478 && bytesRead-prevBytesRead == res.BytesRead { + t.Fatalf("expected the bytes read stat to be around 26478, got %v", + bytesRead-prevBytesRead) } idx.Close() cleanupTmpIndexPath(t, tmpIndexPath) @@ -642,40 +644,39 @@ func TestBytesReadStored(t *testing.T) { query = NewTermQuery("hotel") query.FieldVal = "type" searchRequest = NewSearchRequestOptions(query, int(10), 0, true) - _, err = idx1.Search(searchRequest) + res, err = idx1.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead != 167 { - t.Fatalf("expected the bytes read stat to be around 167, got %v", bytesRead-prevBytesRead) + if bytesRead != 18114 && bytesRead == res.BytesRead { + t.Fatalf("expected the bytes read stat to be around 18114, got %v", bytesRead) } prevBytesRead = bytesRead - _, err = idx1.Search(searchRequest) + res, err = idx1.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 12 { + if bytesRead-prevBytesRead != 12 && bytesRead-prevBytesRead == res.BytesRead { t.Fatalf("expected the bytes read stat to be around 12, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead searchRequest.Fields = []string{"*"} - _, err = idx1.Search(searchRequest) + res, err = idx1.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - - if bytesRead-prevBytesRead != 646 { - t.Fatalf("expected the bytes read stat to be around 646, got %v", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 42 && bytesRead-prevBytesRead == res.BytesRead { + t.Fatalf("expected the bytes read stat to be around 42, got %v", bytesRead-prevBytesRead) } } @@ -816,9 +817,9 @@ type slowQuery struct { delay time.Duration } -func (s *slowQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (s *slowQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { time.Sleep(s.delay) - return s.actual.Searcher(i, m, options) + return s.actual.Searcher(ctx, i, m, options) } func TestSlowSearch(t *testing.T) { diff --git a/search.go b/search.go index 2276aa011..2178d1b85 100644 --- a/search.go +++ b/search.go @@ -486,13 +486,14 @@ func (ss *SearchStatus) Merge(other *SearchStatus) { // A SearchResult describes the results of executing // a SearchRequest. type SearchResult struct { - Status *SearchStatus `json:"status"` - Request *SearchRequest `json:"request"` - Hits search.DocumentMatchCollection `json:"hits"` - Total uint64 `json:"total_hits"` - MaxScore float64 `json:"max_score"` - Took time.Duration `json:"took"` - Facets search.FacetResults `json:"facets"` + Status *SearchStatus `json:"status"` + Request *SearchRequest `json:"request"` + Hits search.DocumentMatchCollection `json:"hits"` + Total uint64 `json:"total_hits"` + BytesRead uint64 `json:"bytesRead,omitempty"` + MaxScore float64 `json:"max_score"` + Took time.Duration `json:"took"` + Facets search.FacetResults `json:"facets"` } func (sr *SearchResult) Size() int { @@ -559,6 +560,7 @@ func (sr *SearchResult) Merge(other *SearchResult) { sr.Status.Merge(other.Status) sr.Hits = append(sr.Hits, other.Hits...) sr.Total += other.Total + sr.BytesRead += other.BytesRead if other.MaxScore > sr.MaxScore { sr.MaxScore = other.MaxScore } diff --git a/search/collector/search_test.go b/search/collector/search_test.go index 0d7cd09b0..1f6f88213 100644 --- a/search/collector/search_test.go +++ b/search/collector/search_test.go @@ -15,6 +15,7 @@ package collector import ( + "context" "reflect" "github.com/blevesearch/bleve/v2/search" @@ -26,6 +27,14 @@ type stubSearcher struct { matches []*search.DocumentMatch } +func (ss *stubSearcher) SetBytesRead(val uint64) { + +} + +func (ss *stubSearcher) BytesRead() uint64 { + return 0 +} + func (ss *stubSearcher) Size() int { sizeInBytes := int(reflect.TypeOf(*ss).Size()) @@ -93,7 +102,7 @@ func (sr *stubReader) Size() int { return 0 } -func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { +func (sr *stubReader) TermFieldReader(ctx context.Context, term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { return nil, nil } @@ -173,3 +182,6 @@ type DocValueReader struct { func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, visitor index.DocValueVisitor) error { return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor) } +func (dvr *DocValueReader) BytesRead() uint64 { + return 0 +} diff --git a/search/collector/topn.go b/search/collector/topn.go index 7c9db9ff0..4d19cd455 100644 --- a/search/collector/topn.go +++ b/search/collector/topn.go @@ -49,15 +49,12 @@ type collectorCompare func(i, j *search.DocumentMatch) int type collectorFixup func(d *search.DocumentMatch) error -const SearchIOStatsCallbackKey = "_search_io_stats_callback_key" - -type SearchIOStatsCallbackFunc func(uint64) - // TopNCollector collects the top N hits, optionally skipping some results type TopNCollector struct { size int skip int total uint64 + bytesRead uint64 maxScore float64 took time.Duration sort search.SortOrder @@ -201,7 +198,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, } hc.needDocIds = hc.needDocIds || loadID - var totalBytesRead uint64 select { case <-ctx.Done(): return ctx.Err() @@ -209,7 +205,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, next, err = searcher.Next(searchContext) } for err == nil && next != nil { - totalBytesRead += next.BytesRead if hc.total%CheckDoneEvery == 0 { select { case <-ctx.Done(): @@ -231,9 +226,12 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, next, err = searcher.Next(searchContext) } - statsCallbackFn := ctx.Value(SearchIOStatsCallbackKey) + statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey) if statsCallbackFn != nil { - statsCallbackFn.(SearchIOStatsCallbackFunc)(totalBytesRead) + // hc.bytesRead corresponds to the + // total bytes read as part of docValues being read every hit + // which must be accounted by invoking the callback. + statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead) } // help finalize/flush the results in case @@ -361,6 +359,8 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc hc.facetsBuilder.EndDoc() } + hc.bytesRead += hc.dvReader.BytesRead() + return err } diff --git a/search/query/bool_field.go b/search/query/bool_field.go index 0272a2feb..5aa7bb8af 100644 --- a/search/query/bool_field.go +++ b/search/query/bool_field.go @@ -15,6 +15,8 @@ package query import ( + "context" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/searcher" @@ -51,7 +53,7 @@ func (q *BoolFieldQuery) Field() string { return q.FieldVal } -func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *BoolFieldQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -60,5 +62,5 @@ func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, o if q.Bool { term = "T" } - return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), options) + return searcher.NewTermSearcher(ctx, i, term, field, q.BoostVal.Value(), options) } diff --git a/search/query/boolean.go b/search/query/boolean.go index b9c504f85..b5e1fdc40 100644 --- a/search/query/boolean.go +++ b/search/query/boolean.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -113,11 +114,11 @@ func (q *BooleanQuery) Boost() float64 { return q.BoostVal.Value() } -func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { var err error var mustNotSearcher search.Searcher if q.MustNot != nil { - mustNotSearcher, err = q.MustNot.Searcher(i, m, options) + mustNotSearcher, err = q.MustNot.Searcher(ctx, i, m, options) if err != nil { return nil, err } @@ -129,7 +130,7 @@ func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opt var mustSearcher search.Searcher if q.Must != nil { - mustSearcher, err = q.Must.Searcher(i, m, options) + mustSearcher, err = q.Must.Searcher(ctx, i, m, options) if err != nil { return nil, err } @@ -141,7 +142,7 @@ func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opt var shouldSearcher search.Searcher if q.Should != nil { - shouldSearcher, err = q.Should.Searcher(i, m, options) + shouldSearcher, err = q.Should.Searcher(ctx, i, m, options) if err != nil { return nil, err } @@ -158,7 +159,7 @@ func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opt // if only mustNotSearcher, start with MatchAll if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil { - mustSearcher, err = searcher.NewMatchAllSearcher(i, 1.0, options) + mustSearcher, err = searcher.NewMatchAllSearcher(ctx, i, 1.0, options) if err != nil { return nil, err } @@ -169,7 +170,7 @@ func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opt return shouldSearcher, nil } - return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, options) + return searcher.NewBooleanSearcher(ctx, i, mustSearcher, shouldSearcher, mustNotSearcher, options) } func (q *BooleanQuery) Validate() error { diff --git a/search/query/conjunction.go b/search/query/conjunction.go index 7d647646e..27bec7d61 100644 --- a/search/query/conjunction.go +++ b/search/query/conjunction.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "github.com/blevesearch/bleve/v2/mapping" @@ -52,10 +53,10 @@ func (q *ConjunctionQuery) AddQuery(aq ...Query) { } } -func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *ConjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { ss := make([]search.Searcher, 0, len(q.Conjuncts)) for _, conjunct := range q.Conjuncts { - sr, err := conjunct.Searcher(i, m, options) + sr, err := conjunct.Searcher(ctx, i, m, options) if err != nil { for _, searcher := range ss { if searcher != nil { @@ -75,7 +76,7 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, return searcher.NewMatchNoneSearcher(i) } - return searcher.NewConjunctionSearcher(i, ss, options) + return searcher.NewConjunctionSearcher(ctx, i, ss, options) } func (q *ConjunctionQuery) Validate() error { diff --git a/search/query/date_range.go b/search/query/date_range.go index 290786ddb..ef18f2fb8 100644 --- a/search/query/date_range.go +++ b/search/query/date_range.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" "math" @@ -133,7 +134,7 @@ func (q *DateRangeQuery) Field() string { return q.FieldVal } -func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *DateRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { min, max, err := q.parseEndpoints() if err != nil { return nil, err @@ -144,7 +145,7 @@ func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, o field = m.DefaultSearchField() } - return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options) + return searcher.NewNumericRangeSearcher(ctx, i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options) } func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { diff --git a/search/query/disjunction.go b/search/query/disjunction.go index 50957fa67..c6cc0d737 100644 --- a/search/query/disjunction.go +++ b/search/query/disjunction.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -58,11 +59,11 @@ func (q *DisjunctionQuery) SetMin(m float64) { q.Min = m } -func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, +func (q *DisjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { ss := make([]search.Searcher, 0, len(q.Disjuncts)) for _, disjunct := range q.Disjuncts { - sr, err := disjunct.Searcher(i, m, options) + sr, err := disjunct.Searcher(ctx, i, m, options) if err != nil { for _, searcher := range ss { if searcher != nil { @@ -82,7 +83,7 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, return searcher.NewMatchNoneSearcher(i) } - return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) + return searcher.NewDisjunctionSearcher(ctx, i, ss, q.Min, options) } func (q *DisjunctionQuery) Validate() error { diff --git a/search/query/docid.go b/search/query/docid.go index 1d273394d..7116f3913 100644 --- a/search/query/docid.go +++ b/search/query/docid.go @@ -15,6 +15,8 @@ package query import ( + "context" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/searcher" @@ -44,6 +46,6 @@ func (q *DocIDQuery) Boost() float64 { return q.BoostVal.Value() } -func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { - return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), options) +func (q *DocIDQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + return searcher.NewDocIDSearcher(ctx, i, q.IDs, q.BoostVal.Value(), options) } diff --git a/search/query/fuzzy.go b/search/query/fuzzy.go index aceaa802d..f24eb0c20 100644 --- a/search/query/fuzzy.go +++ b/search/query/fuzzy.go @@ -15,6 +15,8 @@ package query import ( + "context" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/searcher" @@ -68,10 +70,10 @@ func (q *FuzzyQuery) SetPrefix(p int) { q.Prefix = p } -func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *FuzzyQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() } - return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options) + return searcher.NewFuzzySearcher(ctx, i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options) } diff --git a/search/query/geo_boundingbox.go b/search/query/geo_boundingbox.go index be4b5a8b9..ac9125393 100644 --- a/search/query/geo_boundingbox.go +++ b/search/query/geo_boundingbox.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -56,7 +57,7 @@ func (q *GeoBoundingBoxQuery) Field() string { return q.FieldVal } -func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -65,20 +66,20 @@ func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMappi if q.BottomRight[0] < q.TopLeft[0] { // cross date line, rewrite as two parts - leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true) + leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(ctx, i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true) if err != nil { return nil, err } - rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true) + rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(ctx, i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true) if err != nil { _ = leftSearcher.Close() return nil, err } - return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options) + return searcher.NewDisjunctionSearcher(ctx, i, []search.Searcher{leftSearcher, rightSearcher}, 0, options) } - return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true) + return searcher.NewGeoBoundingBoxSearcher(ctx, i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true) } func (q *GeoBoundingBoxQuery) Validate() error { diff --git a/search/query/geo_boundingpolygon.go b/search/query/geo_boundingpolygon.go index abb8ccd7c..467f39b28 100644 --- a/search/query/geo_boundingpolygon.go +++ b/search/query/geo_boundingpolygon.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -53,14 +54,14 @@ func (q *GeoBoundingPolygonQuery) Field() string { return q.FieldVal } -func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader, +func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() } - return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options) + return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options) } func (q *GeoBoundingPolygonQuery) Validate() error { diff --git a/search/query/geo_distance.go b/search/query/geo_distance.go index d5174c227..f05bf6723 100644 --- a/search/query/geo_distance.go +++ b/search/query/geo_distance.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -56,7 +57,7 @@ func (q *GeoDistanceQuery) Field() string { return q.FieldVal } -func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, +func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { @@ -68,7 +69,7 @@ func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, return nil, err } - return searcher.NewGeoPointDistanceSearcher(i, q.Location[0], q.Location[1], + return searcher.NewGeoPointDistanceSearcher(ctx, i, q.Location[0], q.Location[1], dist, field, q.BoostVal.Value(), options) } diff --git a/search/query/geo_shape.go b/search/query/geo_shape.go index aff5d2f9d..a63ec80f7 100644 --- a/search/query/geo_shape.go +++ b/search/query/geo_shape.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "github.com/blevesearch/bleve/v2/geo" @@ -99,14 +100,14 @@ func (q *GeoShapeQuery) Field() string { return q.FieldVal } -func (q *GeoShapeQuery) Searcher(i index.IndexReader, +func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() } - return searcher.NewGeoShapeSearcher(i, q.Geometry.Shape, q.Geometry.Relation, field, + return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field, q.BoostVal.Value(), options) } diff --git a/search/query/ip_range.go b/search/query/ip_range.go index b02b5ed2c..68577cc9a 100644 --- a/search/query/ip_range.go +++ b/search/query/ip_range.go @@ -15,6 +15,7 @@ package query import ( + "context" "fmt" "net" @@ -53,7 +54,7 @@ func (q *IPRangeQuery) Field() string { return q.FieldVal } -func (q *IPRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *IPRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -65,9 +66,9 @@ func (q *IPRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opt return nil, err } // If we are searching for a specific ip rather than members of a network, just use a term search. - return searcher.NewTermSearcherBytes(i, ip.To16(), field, q.BoostVal.Value(), options) + return searcher.NewTermSearcherBytes(ctx, i, ip.To16(), field, q.BoostVal.Value(), options) } - return searcher.NewIPRangeSearcher(i, ipNet, field, q.BoostVal.Value(), options) + return searcher.NewIPRangeSearcher(ctx, i, ipNet, field, q.BoostVal.Value(), options) } func (q *IPRangeQuery) Validate() error { diff --git a/search/query/match.go b/search/query/match.go index da1dc091f..61c00a003 100644 --- a/search/query/match.go +++ b/search/query/match.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -114,7 +115,7 @@ func (q *MatchQuery) SetOperator(operator MatchQueryOperator) { q.Operator = operator } -func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *MatchQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { @@ -160,17 +161,17 @@ func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, optio shouldQuery := NewDisjunctionQuery(tqs) shouldQuery.SetMin(1) shouldQuery.SetBoost(q.BoostVal.Value()) - return shouldQuery.Searcher(i, m, options) + return shouldQuery.Searcher(ctx, i, m, options) case MatchQueryOperatorAnd: mustQuery := NewConjunctionQuery(tqs) mustQuery.SetBoost(q.BoostVal.Value()) - return mustQuery.Searcher(i, m, options) + return mustQuery.Searcher(ctx, i, m, options) default: return nil, fmt.Errorf("unhandled operator %d", q.Operator) } } noneQuery := NewMatchNoneQuery() - return noneQuery.Searcher(i, m, options) + return noneQuery.Searcher(ctx, i, m, options) } diff --git a/search/query/match_all.go b/search/query/match_all.go index a31f25abc..e88825ae4 100644 --- a/search/query/match_all.go +++ b/search/query/match_all.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "github.com/blevesearch/bleve/v2/mapping" @@ -42,8 +43,8 @@ func (q *MatchAllQuery) Boost() float64 { return q.BoostVal.Value() } -func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { - return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), options) +func (q *MatchAllQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + return searcher.NewMatchAllSearcher(ctx, i, q.BoostVal.Value(), options) } func (q *MatchAllQuery) MarshalJSON() ([]byte, error) { diff --git a/search/query/match_none.go b/search/query/match_none.go index 69b44186c..cb65a725f 100644 --- a/search/query/match_none.go +++ b/search/query/match_none.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "github.com/blevesearch/bleve/v2/mapping" @@ -42,7 +43,7 @@ func (q *MatchNoneQuery) Boost() float64 { return q.BoostVal.Value() } -func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *MatchNoneQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { return searcher.NewMatchNoneSearcher(i) } diff --git a/search/query/match_phrase.go b/search/query/match_phrase.go index 057245fbc..fa8ac720b 100644 --- a/search/query/match_phrase.go +++ b/search/query/match_phrase.go @@ -15,6 +15,7 @@ package query import ( + "context" "fmt" "github.com/blevesearch/bleve/v2/analysis" @@ -61,7 +62,7 @@ func (q *MatchPhraseQuery) Field() string { return q.FieldVal } -func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *MatchPhraseQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -83,10 +84,10 @@ func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, phrase := tokenStreamToPhrase(tokens) phraseQuery := NewMultiPhraseQuery(phrase, field) phraseQuery.SetBoost(q.BoostVal.Value()) - return phraseQuery.Searcher(i, m, options) + return phraseQuery.Searcher(ctx, i, m, options) } noneQuery := NewMatchNoneQuery() - return noneQuery.Searcher(i, m, options) + return noneQuery.Searcher(ctx, i, m, options) } func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string { diff --git a/search/query/multi_phrase.go b/search/query/multi_phrase.go index d75dc0c89..2887be16a 100644 --- a/search/query/multi_phrase.go +++ b/search/query/multi_phrase.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -55,8 +56,8 @@ func (q *MultiPhraseQuery) Boost() float64 { return q.BoostVal.Value() } -func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { - return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options) +func (q *MultiPhraseQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + return searcher.NewMultiPhraseSearcher(ctx, i, q.Terms, q.Field, options) } func (q *MultiPhraseQuery) Validate() error { diff --git a/search/query/numeric_range.go b/search/query/numeric_range.go index a1fe7b6c1..ad2474167 100644 --- a/search/query/numeric_range.go +++ b/search/query/numeric_range.go @@ -15,6 +15,7 @@ package query import ( + "context" "fmt" "github.com/blevesearch/bleve/v2/mapping" @@ -71,12 +72,12 @@ func (q *NumericRangeQuery) Field() string { return q.FieldVal } -func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() } - return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) + return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) } func (q *NumericRangeQuery) Validate() error { diff --git a/search/query/phrase.go b/search/query/phrase.go index d6da11853..207e66b17 100644 --- a/search/query/phrase.go +++ b/search/query/phrase.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" @@ -52,8 +53,8 @@ func (q *PhraseQuery) Boost() float64 { return q.BoostVal.Value() } -func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { - return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options) +func (q *PhraseQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + return searcher.NewPhraseSearcher(ctx, i, q.Terms, q.Field, options) } func (q *PhraseQuery) Validate() error { diff --git a/search/query/prefix.go b/search/query/prefix.go index 05dc40c04..debbbc1e3 100644 --- a/search/query/prefix.go +++ b/search/query/prefix.go @@ -15,6 +15,8 @@ package query import ( + "context" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/searcher" @@ -53,10 +55,10 @@ func (q *PrefixQuery) Field() string { return q.FieldVal } -func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *PrefixQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() } - return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), options) + return searcher.NewTermPrefixSearcher(ctx, i, q.Prefix, field, q.BoostVal.Value(), options) } diff --git a/search/query/query.go b/search/query/query.go index ee876c8e0..df560534b 100644 --- a/search/query/query.go +++ b/search/query/query.go @@ -15,6 +15,7 @@ package query import ( + "context" "encoding/json" "fmt" "io/ioutil" @@ -36,7 +37,7 @@ func SetLog(l *log.Logger) { // A Query represents a description of the type // and parameters for a query into the index. type Query interface { - Searcher(i index.IndexReader, m mapping.IndexMapping, + Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) } diff --git a/search/query/query_string.go b/search/query/query_string.go index fe1680c52..42bb598bb 100644 --- a/search/query/query_string.go +++ b/search/query/query_string.go @@ -15,6 +15,8 @@ package query import ( + "context" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" @@ -47,12 +49,12 @@ func (q *QueryStringQuery) Parse() (Query, error) { return parseQuerySyntax(q.Query) } -func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *QueryStringQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { newQuery, err := parseQuerySyntax(q.Query) if err != nil { return nil, err } - return newQuery.Searcher(i, m, options) + return newQuery.Searcher(ctx, i, m, options) } func (q *QueryStringQuery) Validate() error { diff --git a/search/query/regexp.go b/search/query/regexp.go index ba744ec15..6b3da9554 100644 --- a/search/query/regexp.go +++ b/search/query/regexp.go @@ -15,6 +15,7 @@ package query import ( + "context" "strings" "github.com/blevesearch/bleve/v2/mapping" @@ -57,7 +58,7 @@ func (q *RegexpQuery) Field() string { return q.FieldVal } -func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *RegexpQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -72,7 +73,7 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opti actualRegexp = actualRegexp[1:] // remove leading ^ } - return searcher.NewRegexpStringSearcher(i, actualRegexp, field, + return searcher.NewRegexpStringSearcher(ctx, i, actualRegexp, field, q.BoostVal.Value(), options) } diff --git a/search/query/term.go b/search/query/term.go index 82958bb02..5c6af3962 100644 --- a/search/query/term.go +++ b/search/query/term.go @@ -15,6 +15,8 @@ package query import ( + "context" + "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/searcher" @@ -52,10 +54,10 @@ func (q *TermQuery) Field() string { return q.FieldVal } -func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *TermQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() } - return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), options) + return searcher.NewTermSearcher(ctx, i, q.Term, field, q.BoostVal.Value(), options) } diff --git a/search/query/term_range.go b/search/query/term_range.go index 3edfa6954..4dc3a34b7 100644 --- a/search/query/term_range.go +++ b/search/query/term_range.go @@ -15,6 +15,7 @@ package query import ( + "context" "fmt" "github.com/blevesearch/bleve/v2/mapping" @@ -71,7 +72,7 @@ func (q *TermRangeQuery) Field() string { return q.FieldVal } -func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *TermRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -84,7 +85,7 @@ func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, o if q.Max != "" { maxTerm = []byte(q.Max) } - return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) + return searcher.NewTermRangeSearcher(ctx, i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) } func (q *TermRangeQuery) Validate() error { diff --git a/search/query/wildcard.go b/search/query/wildcard.go index 7713a9acb..f04f3f2ed 100644 --- a/search/query/wildcard.go +++ b/search/query/wildcard.go @@ -15,6 +15,7 @@ package query import ( + "context" "strings" "github.com/blevesearch/bleve/v2/mapping" @@ -76,7 +77,7 @@ func (q *WildcardQuery) Field() string { return q.FieldVal } -func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { +func (q *WildcardQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { field := q.FieldVal if q.FieldVal == "" { field = m.DefaultSearchField() @@ -84,7 +85,7 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, op regexpString := wildcardRegexpReplacer.Replace(q.Wildcard) - return searcher.NewRegexpStringSearcher(i, regexpString, field, + return searcher.NewRegexpStringSearcher(ctx, i, regexpString, field, q.BoostVal.Value(), options) } diff --git a/search/scorer/scorer_conjunction.go b/search/scorer/scorer_conjunction.go index f5dd8ca54..f3c81a78c 100644 --- a/search/scorer/scorer_conjunction.go +++ b/search/scorer/scorer_conjunction.go @@ -41,13 +41,7 @@ func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQuery options: options, } } -func getTotalBytesRead(matches []*search.DocumentMatch) uint64 { - var rv uint64 - for _, match := range matches { - rv += match.BytesRead - } - return rv -} + func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch { var sum float64 var childrenExplanations []*search.Explanation @@ -73,7 +67,6 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ rv.Expl = newExpl rv.FieldTermLocations = search.MergeFieldTermLocations( rv.FieldTermLocations, constituents[1:]) - rv.BytesRead = getTotalBytesRead(constituents) return rv } diff --git a/search/scorer/scorer_disjunction.go b/search/scorer/scorer_disjunction.go index fd9d0bb0f..054e76fd4 100644 --- a/search/scorer/scorer_disjunction.go +++ b/search/scorer/scorer_disjunction.go @@ -78,6 +78,6 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ rv.Expl = newExpl rv.FieldTermLocations = search.MergeFieldTermLocations( rv.FieldTermLocations, constituents[1:]) - rv.BytesRead = getTotalBytesRead(constituents) + return rv } diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go index ce5f202d8..7b60eda4e 100644 --- a/search/scorer/scorer_term.go +++ b/search/scorer/scorer_term.go @@ -198,6 +198,5 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term }) } } - rv.BytesRead = termMatch.BytesRead return rv } diff --git a/search/search.go b/search/search.go index d45491b4a..69d8945f9 100644 --- a/search/search.go +++ b/search/search.go @@ -27,6 +27,10 @@ var reflectStaticSizeDocumentMatch int var reflectStaticSizeSearchContext int var reflectStaticSizeLocation int +const SearchIOStatsCallbackKey = "_search_io_stats_callback_key" + +type SearchIOStatsCallbackFunc func(uint64) + func init() { var dm DocumentMatch reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size()) @@ -166,8 +170,6 @@ type DocumentMatch struct { // be later incorporated into the Locations map when search // results are completed FieldTermLocations []FieldTermLocation `json:"-"` - - BytesRead uint64 `json:"-"` } func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go index ef9093c20..bf207f810 100644 --- a/search/searcher/search_boolean.go +++ b/search/searcher/search_boolean.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "math" "reflect" @@ -48,7 +49,7 @@ type BooleanSearcher struct { done bool } -func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { +func NewBooleanSearcher(ctx context.Context, indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { // build our searcher rv := BooleanSearcher{ indexReader: indexReader, diff --git a/search/searcher/search_boolean_test.go b/search/searcher/search_boolean_test.go index d5f4875b8..f1a748f08 100644 --- a/search/searcher/search_boolean_test.go +++ b/search/searcher/search_boolean_test.go @@ -40,210 +40,210 @@ func TestBooleanSearch(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} // test 0 - beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher}, explainTrue) + mustSearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher}, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - shouldSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue) + shouldSearcher, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue) if err != nil { t.Fatal(err) } - steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustNotSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, explainTrue) + mustNotSearcher, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, explainTrue) + booleanSearcher, err := NewBooleanSearcher(nil, twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, explainTrue) if err != nil { t.Fatal(err) } // test 1 - martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - shouldSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, explainTrue) + shouldSearcher2, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, explainTrue) if err != nil { t.Fatal(err) } - steveTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustNotSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, explainTrue) + mustNotSearcher2, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher2, err := NewBooleanSearcher(twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, explainTrue) + booleanSearcher2, err := NewBooleanSearcher(nil, twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, explainTrue) if err != nil { t.Fatal(err) } // test 2 - steveTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher3, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustNotSearcher3, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, explainTrue) + mustNotSearcher3, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher3, err := NewBooleanSearcher(twoDocIndexReader, nil, nil, mustNotSearcher3, explainTrue) + booleanSearcher3, err := NewBooleanSearcher(nil, twoDocIndexReader, nil, nil, mustNotSearcher3, explainTrue) if err != nil { t.Fatal(err) } // test 3 - beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher4, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustSearcher4, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4}, explainTrue) + mustSearcher4, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher4}, explainTrue) if err != nil { t.Fatal(err) } - steveTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher4, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustNotSearcher4, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, explainTrue) + mustNotSearcher4, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher4, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, explainTrue) + booleanSearcher4, err := NewBooleanSearcher(nil, twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, explainTrue) if err != nil { t.Fatal(err) } // test 4 - beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher5, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustSearcher5, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5}, explainTrue) + mustSearcher5, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher5}, explainTrue) if err != nil { t.Fatal(err) } - steveTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher5, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher5, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustNotSearcher5, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, explainTrue) + mustNotSearcher5, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher5, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, explainTrue) + booleanSearcher5, err := NewBooleanSearcher(nil, twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, explainTrue) if err != nil { t.Fatal(err) } // test 5 - beerTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher6, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustSearcher6, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher6}, explainTrue) + mustSearcher6, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher6}, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher6, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher6, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - shouldSearcher6, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, explainTrue) + shouldSearcher6, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher6, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, explainTrue) + booleanSearcher6, err := NewBooleanSearcher(nil, twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, explainTrue) if err != nil { t.Fatal(err) } // test 6 - beerTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher7, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher7}, explainTrue) + mustSearcher7, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher7}, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher7, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher7, nil, nil, explainTrue) + booleanSearcher7, err := NewBooleanSearcher(nil, twoDocIndexReader, mustSearcher7, nil, nil, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, explainTrue) + martyTermSearcher7, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 5.0, explainTrue) if err != nil { t.Fatal(err) } - conjunctionSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, explainTrue) + conjunctionSearcher7, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, explainTrue) // test 7 - beerTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher8, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher8}, explainTrue) + mustSearcher8, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher8}, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher8, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher8, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - shouldSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, explainTrue) + shouldSearcher8, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, explainTrue) if err != nil { t.Fatal(err) } - steveTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher8, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - mustNotSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, explainTrue) + mustNotSearcher8, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, explainTrue) if err != nil { t.Fatal(err) } - booleanSearcher8, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, explainTrue) + booleanSearcher8, err := NewBooleanSearcher(nil, twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher8a, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 5.0, explainTrue) + dustinTermSearcher8a, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 5.0, explainTrue) if err != nil { t.Fatal(err) } - conjunctionSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, explainTrue) + conjunctionSearcher8, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, explainTrue) if err != nil { t.Fatal(err) } diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go index 5fe59b900..19ef199ac 100644 --- a/search/searcher/search_conjunction.go +++ b/search/searcher/search_conjunction.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "math" "reflect" "sort" @@ -41,9 +42,10 @@ type ConjunctionSearcher struct { scorer *scorer.ConjunctionQueryScorer initialized bool options search.SearcherOptions + bytesRead uint64 } -func NewConjunctionSearcher(indexReader index.IndexReader, +func NewConjunctionSearcher(ctx context.Context, indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) ( search.Searcher, error) { // build the sorted downstream searchers @@ -57,7 +59,7 @@ func NewConjunctionSearcher(indexReader index.IndexReader, // do not need extra information like freq-norm's or term vectors if len(searchers) > 1 && options.Score == "none" && !options.IncludeTermVectors { - rv, err := optimizeCompositeSearcher("conjunction:unadorned", + rv, err := optimizeCompositeSearcher(ctx, "conjunction:unadorned", indexReader, searchers, options) if err != nil || rv != nil { return rv, err @@ -76,7 +78,7 @@ func NewConjunctionSearcher(indexReader index.IndexReader, // attempt push-down conjunction optimization when there's >1 searchers if len(searchers) > 1 { - rv, err := optimizeCompositeSearcher("conjunction", + rv, err := optimizeCompositeSearcher(ctx, "conjunction", indexReader, searchers, options) if err != nil || rv != nil { return rv, err diff --git a/search/searcher/search_conjunction_test.go b/search/searcher/search_conjunction_test.go index 747b3a016..d09c4efa2 100644 --- a/search/searcher/search_conjunction_test.go +++ b/search/searcher/search_conjunction_test.go @@ -40,93 +40,93 @@ func TestConjunctionSearch(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} // test 0 - beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, explainTrue) + martyTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 5.0, explainTrue) if err != nil { t.Fatal(err) } - beerAndMartySearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, explainTrue) + beerAndMartySearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, explainTrue) if err != nil { t.Fatal(err) } // test 1 - angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, explainTrue) + angstTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "angst", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - beerTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - angstAndBeerSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, explainTrue) + angstAndBeerSearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, explainTrue) if err != nil { t.Fatal(err) } // test 2 - beerTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher3, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - jackTermSearcher, err := NewTermSearcher(twoDocIndexReader, "jack", "name", 5.0, explainTrue) + jackTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "jack", "name", 5.0, explainTrue) if err != nil { t.Fatal(err) } - beerAndJackSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, explainTrue) + beerAndJackSearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, explainTrue) if err != nil { t.Fatal(err) } // test 3 - beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, explainTrue) + beerTermSearcher4, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - misterTermSearcher, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, explainTrue) + misterTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "mister", "title", 5.0, explainTrue) if err != nil { t.Fatal(err) } - beerAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, explainTrue) + beerAndMisterSearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, explainTrue) if err != nil { t.Fatal(err) } // test 4 - couchbaseTermSearcher, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, explainTrue) + couchbaseTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "couchbase", "street", 1.0, explainTrue) if err != nil { t.Fatal(err) } - misterTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, explainTrue) + misterTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "mister", "title", 5.0, explainTrue) if err != nil { t.Fatal(err) } - couchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, explainTrue) + couchbaseAndMisterSearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, explainTrue) if err != nil { t.Fatal(err) } // test 5 - beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 5.0, explainTrue) + beerTermSearcher5, err := NewTermSearcher(nil, twoDocIndexReader, "beer", "desc", 5.0, explainTrue) if err != nil { t.Fatal(err) } - couchbaseTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, explainTrue) + couchbaseTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "couchbase", "street", 1.0, explainTrue) if err != nil { t.Fatal(err) } - misterTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, explainTrue) + misterTermSearcher3, err := NewTermSearcher(nil, twoDocIndexReader, "mister", "title", 5.0, explainTrue) if err != nil { t.Fatal(err) } - couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, explainTrue) + couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, explainTrue) if err != nil { t.Fatal(err) } - beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, explainTrue) + beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, explainTrue) if err != nil { t.Fatal(err) } @@ -353,7 +353,7 @@ func testScorchCompositeSearchOptimizationsHelper( field := ft[0] term := ft[1] - searcher, err := NewTermSearcher(indexReader, term, field, 1.0, searcherOptions) + searcher, err := NewTermSearcher(nil, indexReader, term, field, 1.0, searcherOptions) if err != nil { t.Fatal(err) } @@ -365,9 +365,9 @@ func testScorchCompositeSearchOptimizationsHelper( var cs search.Searcher var err error if compositeKind == "conjunction" { - cs, err = NewConjunctionSearcher(indexReader, searchers, searcherOptions) + cs, err = NewConjunctionSearcher(nil, indexReader, searchers, searcherOptions) } else { - cs, err = NewDisjunctionSearcher(indexReader, searchers, 0, searcherOptions) + cs, err = NewDisjunctionSearcher(nil, indexReader, searchers, 0, searcherOptions) } if err != nil { t.Fatal(err) diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go index a2f1cf2ab..606a157ae 100644 --- a/search/searcher/search_disjunction.go +++ b/search/searcher/search_disjunction.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "fmt" "github.com/blevesearch/bleve/v2/search" @@ -31,10 +32,10 @@ var DisjunctionMaxClauseCount = 0 // slice implementation to a heap implementation. var DisjunctionHeapTakeover = 10 -func NewDisjunctionSearcher(indexReader index.IndexReader, +func NewDisjunctionSearcher(ctx context.Context, indexReader index.IndexReader, qsearchers []search.Searcher, min float64, options search.SearcherOptions) ( search.Searcher, error) { - return newDisjunctionSearcher(indexReader, qsearchers, min, options, true) + return newDisjunctionSearcher(ctx, indexReader, qsearchers, min, options, true) } func optionsDisjunctionOptimizable(options search.SearcherOptions) bool { @@ -42,7 +43,7 @@ func optionsDisjunctionOptimizable(options search.SearcherOptions) bool { return rv } -func newDisjunctionSearcher(indexReader index.IndexReader, +func newDisjunctionSearcher(ctx context.Context, indexReader index.IndexReader, qsearchers []search.Searcher, min float64, options search.SearcherOptions, limit bool) (search.Searcher, error) { // attempt the "unadorned" disjunction optimization only when we @@ -50,7 +51,7 @@ func newDisjunctionSearcher(indexReader index.IndexReader, // and the requested min is simple if len(qsearchers) > 1 && min <= 1 && optionsDisjunctionOptimizable(options) { - rv, err := optimizeCompositeSearcher("disjunction:unadorned", + rv, err := optimizeCompositeSearcher(ctx, "disjunction:unadorned", indexReader, qsearchers, options) if err != nil || rv != nil { return rv, err @@ -58,14 +59,14 @@ func newDisjunctionSearcher(indexReader index.IndexReader, } if len(qsearchers) > DisjunctionHeapTakeover { - return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options, + return newDisjunctionHeapSearcher(ctx, indexReader, qsearchers, min, options, limit) } - return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options, + return newDisjunctionSliceSearcher(ctx, indexReader, qsearchers, min, options, limit) } -func optimizeCompositeSearcher(optimizationKind string, +func optimizeCompositeSearcher(ctx context.Context, optimizationKind string, indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (search.Searcher, error) { var octx index.OptimizableContext diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go index bf945976d..023583832 100644 --- a/search/searcher/search_disjunction_heap.go +++ b/search/searcher/search_disjunction_heap.go @@ -17,6 +17,7 @@ package searcher import ( "bytes" "container/heap" + "context" "math" "reflect" @@ -55,9 +56,11 @@ type DisjunctionHeapSearcher struct { matching []*search.DocumentMatch matchingCurrs []*SearcherCurr + + bytesRead uint64 } -func newDisjunctionHeapSearcher(indexReader index.IndexReader, +func newDisjunctionHeapSearcher(ctx context.Context, indexReader index.IndexReader, searchers []search.Searcher, min float64, options search.SearcherOptions, limit bool) ( *DisjunctionHeapSearcher, error) { diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go index 63ee7ef2e..6958cf492 100644 --- a/search/searcher/search_disjunction_slice.go +++ b/search/searcher/search_disjunction_slice.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "math" "reflect" "sort" @@ -43,9 +44,10 @@ type DisjunctionSliceSearcher struct { matching []*search.DocumentMatch matchingIdxs []int initialized bool + bytesRead uint64 } -func newDisjunctionSliceSearcher(indexReader index.IndexReader, +func newDisjunctionSliceSearcher(ctx context.Context, indexReader index.IndexReader, qsearchers []search.Searcher, min float64, options search.SearcherOptions, limit bool) ( *DisjunctionSliceSearcher, error) { diff --git a/search/searcher/search_disjunction_test.go b/search/searcher/search_disjunction_test.go index b4bf31673..ed69bc044 100644 --- a/search/searcher/search_disjunction_test.go +++ b/search/searcher/search_disjunction_test.go @@ -36,37 +36,37 @@ func TestDisjunctionSearch(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} - martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue) + martyOrDustinSearcher, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue) if err != nil { t.Fatal(err) } - martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher2, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - martyOrDustinSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, explainTrue) + martyOrDustinSearcher2, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, explainTrue) if err != nil { t.Fatal(err) } - raviTermSearcher, err := NewTermSearcher(twoDocIndexReader, "ravi", "name", 1.0, explainTrue) + raviTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "ravi", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, explainTrue) + nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, explainTrue) if err != nil { t.Fatal(err) } @@ -159,15 +159,15 @@ func TestDisjunctionAdvance(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} - martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue) + martyOrDustinSearcher, err := NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue) if err != nil { t.Fatal(err) } @@ -206,19 +206,19 @@ func TestDisjunctionSearchTooMany(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} - martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, explainTrue) + martyTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "marty", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, explainTrue) + dustinTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "dustin", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, explainTrue) + steveTermSearcher, err := NewTermSearcher(nil, twoDocIndexReader, "steve", "name", 1.0, explainTrue) if err != nil { t.Fatal(err) } - _, err = NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher, steveTermSearcher}, 0, explainTrue) + _, err = NewDisjunctionSearcher(nil, twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher, steveTermSearcher}, 0, explainTrue) if err == nil { t.Fatal(err) } diff --git a/search/searcher/search_docid.go b/search/searcher/search_docid.go index 2d90ae166..720fd3233 100644 --- a/search/searcher/search_docid.go +++ b/search/searcher/search_docid.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "reflect" "github.com/blevesearch/bleve/v2/search" @@ -37,7 +38,7 @@ type DocIDSearcher struct { count int } -func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, +func NewDocIDSearcher(ctx context.Context, indexReader index.IndexReader, ids []string, boost float64, options search.SearcherOptions) (searcher *DocIDSearcher, err error) { reader, err := indexReader.DocIDReaderOnly(ids) diff --git a/search/searcher/search_docid_test.go b/search/searcher/search_docid_test.go index 01893d8f6..7727de7ff 100644 --- a/search/searcher/search_docid_test.go +++ b/search/searcher/search_docid_test.go @@ -61,7 +61,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { explainOff := search.SearcherOptions{Explain: false} - searcher, err := NewDocIDSearcher(indexReader, searched, 1.0, explainOff) + searcher, err := NewDocIDSearcher(nil, indexReader, searched, 1.0, explainOff) if err != nil { t.Fatal(err) } diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go index 9cab0f78b..4e4dd5eae 100644 --- a/search/searcher/search_filter.go +++ b/search/searcher/search_filter.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "reflect" "github.com/blevesearch/bleve/v2/search" @@ -41,7 +42,7 @@ type FilteringSearcher struct { accept FilterFunc } -func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher { +func NewFilteringSearcher(ctx context.Context, s search.Searcher, filter FilterFunc) *FilteringSearcher { return &FilteringSearcher{ child: s, accept: filter, diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go index aab67010c..9423b611e 100644 --- a/search/searcher/search_fuzzy.go +++ b/search/searcher/search_fuzzy.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "fmt" "github.com/blevesearch/bleve/v2/search" @@ -23,7 +24,7 @@ import ( var MaxFuzziness = 2 -func NewFuzzySearcher(indexReader index.IndexReader, term string, +func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { @@ -44,19 +45,47 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, break } } - candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, + fuzzyCandidates, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, field, prefixTerm) if err != nil { return nil, err } - return NewMultiTermSearcher(indexReader, candidateTerms, field, + var candidates []string + var dictBytesRead uint64 + if fuzzyCandidates != nil { + candidates = fuzzyCandidates.candidates + dictBytesRead = fuzzyCandidates.bytesRead + } + + if ctx != nil { + reportIOStats(dictBytesRead, ctx) + } + + return NewMultiTermSearcher(ctx, indexReader, candidates, field, boost, options, true) } +type fuzzyCandidates struct { + candidates []string + bytesRead uint64 +} + +func reportIOStats(bytesRead uint64, ctx context.Context) { + // The fuzzy, regexp like queries essentially load a dictionary, + // which potentially incurs a cost that must be accounted by + // using the callback to report the value. + statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey) + if statsCallbackFn != nil { + statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead) + } +} + func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, - fuzziness int, field, prefixTerm string) (rv []string, err error) { - rv = make([]string, 0) + fuzziness int, field, prefixTerm string) (rv *fuzzyCandidates, err error) { + rv = &fuzzyCandidates{ + candidates: make([]string, 0), + } // in case of advanced reader implementations directly call // the levenshtein automaton based iterator to collect the @@ -73,12 +102,14 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, }() tfd, err := fieldDict.Next() for err == nil && tfd != nil { - rv = append(rv, tfd.Term) - if tooManyClauses(len(rv)) { - return nil, tooManyClausesErr(field, len(rv)) + rv.candidates = append(rv.candidates, tfd.Term) + if tooManyClauses(len(rv.candidates)) { + return nil, tooManyClausesErr(field, len(rv.candidates)) } tfd, err = fieldDict.Next() } + + rv.bytesRead = fieldDict.BytesRead() return rv, err } @@ -105,13 +136,14 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, var exceeded bool ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse) if !exceeded && ld <= fuzziness { - rv = append(rv, tfd.Term) - if tooManyClauses(len(rv)) { - return nil, tooManyClausesErr(field, len(rv)) + rv.candidates = append(rv.candidates, tfd.Term) + if tooManyClauses(len(rv.candidates)) { + return nil, tooManyClausesErr(field, len(rv.candidates)) } } tfd, err = fieldDict.Next() } + rv.bytesRead = fieldDict.BytesRead() return rv, err } diff --git a/search/searcher/search_fuzzy_test.go b/search/searcher/search_fuzzy_test.go index 38cc04523..e6d4c2e42 100644 --- a/search/searcher/search_fuzzy_test.go +++ b/search/searcher/search_fuzzy_test.go @@ -36,22 +36,22 @@ func TestFuzzySearch(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} - fuzzySearcherbeet, err := NewFuzzySearcher(twoDocIndexReader, "beet", 0, 1, "desc", 1.0, explainTrue) + fuzzySearcherbeet, err := NewFuzzySearcher(nil, twoDocIndexReader, "beet", 0, 1, "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - fuzzySearcherdouches, err := NewFuzzySearcher(twoDocIndexReader, "douches", 0, 2, "desc", 1.0, explainTrue) + fuzzySearcherdouches, err := NewFuzzySearcher(nil, twoDocIndexReader, "douches", 0, 2, "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - fuzzySearcheraplee, err := NewFuzzySearcher(twoDocIndexReader, "aplee", 0, 2, "desc", 1.0, explainTrue) + fuzzySearcheraplee, err := NewFuzzySearcher(nil, twoDocIndexReader, "aplee", 0, 2, "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } - fuzzySearcherprefix, err := NewFuzzySearcher(twoDocIndexReader, "water", 3, 2, "desc", 1.0, explainTrue) + fuzzySearcherprefix, err := NewFuzzySearcher(nil, twoDocIndexReader, "water", 3, 2, "desc", 1.0, explainTrue) if err != nil { t.Fatal(err) } @@ -143,12 +143,12 @@ func TestFuzzySearch(t *testing.T) { func TestFuzzySearchLimitErrors(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} - _, err := NewFuzzySearcher(nil, "water", 3, 3, "desc", 1.0, explainTrue) + _, err := NewFuzzySearcher(nil, nil, "water", 3, 3, "desc", 1.0, explainTrue) if err == nil { t.Fatal("`fuzziness exceeds max (2)` error expected") } - _, err = NewFuzzySearcher(nil, "water", 3, -1, "desc", 1.0, explainTrue) + _, err = NewFuzzySearcher(nil, nil, "water", 3, -1, "desc", 1.0, explainTrue) if err == nil { t.Fatal("`invalid fuzziness, negative` error expected") } diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go index e438bdd3a..05ca1bf95 100644 --- a/search/searcher/search_geoboundingbox.go +++ b/search/searcher/search_geoboundingbox.go @@ -15,6 +15,8 @@ package searcher import ( + "context" + "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/geo" "github.com/blevesearch/bleve/v2/numeric" @@ -27,7 +29,7 @@ type filterFunc func(key []byte) bool var GeoBitsShift1 = geo.GeoBits << 1 var GeoBitsShift1Minus1 = GeoBitsShift1 - 1 -func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, +func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( search.Searcher, error) { @@ -36,7 +38,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, if err == nil { terms := sp.GetQueryTokens(geo.NewBoundedRectangle(minLat, minLon, maxLat, maxLon)) - boxSearcher, err := NewMultiTermSearcher(indexReader, + boxSearcher, err := NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, false) if err != nil { return nil, err @@ -47,7 +49,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, return nil, err } - return NewFilteringSearcher(boxSearcher, buildRectFilter(dvReader, + return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat)), nil } } @@ -63,7 +65,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, } // do math to produce list of terms needed for this search - onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1, + onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(nil, 0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field) if err != nil { return nil, err @@ -76,13 +78,13 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, } if len(onBoundaryTerms) > 0 { - rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader, + rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(ctx, indexReader, onBoundaryTerms, field, boost, options, false) if err != nil { return nil, err } // add filter to check points near the boundary - onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher, + onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher, buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat)) openedSearchers = append(openedSearchers, onBoundarySearcher) } @@ -90,7 +92,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, var notOnBoundarySearcher search.Searcher if len(notOnBoundaryTerms) > 0 { var err error - notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader, + notOnBoundarySearcher, err = NewMultiTermSearcherBytes(ctx, indexReader, notOnBoundaryTerms, field, boost, options, false) if err != nil { cleanupOpenedSearchers() @@ -100,7 +102,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, } if onBoundarySearcher != nil && notOnBoundarySearcher != nil { - rv, err := NewDisjunctionSearcher(indexReader, + rv, err := NewDisjunctionSearcher(ctx, indexReader, []search.Searcher{ onBoundarySearcher, notOnBoundarySearcher, @@ -125,12 +127,12 @@ var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 type closeFunc func() error -func ComputeGeoRange(term uint64, shift uint, +func ComputeGeoRange(ctx context.Context, term uint64, shift uint, sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, indexReader index.IndexReader, field string) ( onBoundary [][]byte, notOnBoundary [][]byte, err error) { - isIndexed, closeF, err := buildIsIndexedFunc(indexReader, field) + isIndexed, closeF, err := buildIsIndexedFunc(ctx, indexReader, field) if closeF != nil { defer func() { cerr := closeF() @@ -156,7 +158,7 @@ func ComputeGeoRange(term uint64, shift uint, return grc.onBoundary, grc.notOnBoundary, nil } -func buildIsIndexedFunc(indexReader index.IndexReader, field string) (isIndexed filterFunc, closeF closeFunc, err error) { +func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, field string) (isIndexed filterFunc, closeF closeFunc, err error) { if irr, ok := indexReader.(index.IndexReaderContains); ok { fieldDict, err := irr.FieldDictContains(field) if err != nil { @@ -179,7 +181,7 @@ func buildIsIndexedFunc(indexReader index.IndexReader, field string) (isIndexed } } else if indexReader != nil { isIndexed = func(term []byte) bool { - reader, err := indexReader.TermFieldReader(term, field, false, false, false) + reader, err := indexReader.TermFieldReader(ctx, term, field, false, false, false) if err != nil || reader == nil { return false } diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go index c6d45a7c4..5f1907967 100644 --- a/search/searcher/search_geoboundingbox_test.go +++ b/search/searcher/search_geoboundingbox_test.go @@ -73,7 +73,7 @@ func TestGeoBoundingBox(t *testing.T) { func testGeoBoundingBoxSearch(i index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string) ([]string, error) { var rv []string - gbs, err := NewGeoBoundingBoxSearcher(i, minLon, minLat, maxLon, maxLat, field, 1.0, search.SearcherOptions{}, true) + gbs, err := NewGeoBoundingBoxSearcher(nil, i, minLon, minLat, maxLon, maxLat, field, 1.0, search.SearcherOptions{}, true) if err != nil { return nil, err } @@ -184,7 +184,7 @@ func TestComputeGeoRange(t *testing.T) { } for testi, test := range tests { - onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1, + onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(nil, 0, GeoBitsShift1Minus1, -1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true, nil, "") if (err != nil) != (test.err != "") { t.Errorf("test: %+v, err: %v", test, err) @@ -245,7 +245,7 @@ func benchmarkComputeGeoRange(b *testing.B, for i := 0; i < b.N; i++ { onBoundaryRes, offBoundaryRes, err := - ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "") + ComputeGeoRange(nil, 0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "") if err != nil { b.Fatalf("expected no err") } diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go index 8301fbb42..01ed20929 100644 --- a/search/searcher/search_geopointdistance.go +++ b/search/searcher/search_geopointdistance.go @@ -15,13 +15,15 @@ package searcher import ( + "context" + "github.com/blevesearch/bleve/v2/geo" "github.com/blevesearch/bleve/v2/numeric" "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" ) -func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, +func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexReader, centerLon, centerLat, dist float64, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { var rectSearcher search.Searcher @@ -30,7 +32,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, if err == nil { terms := sp.GetQueryTokens(geo.NewPointDistance(centerLat, centerLon, dist)) - rectSearcher, err = NewMultiTermSearcher(indexReader, terms, + rectSearcher, err = NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, false) if err != nil { return nil, err @@ -49,7 +51,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, } // build a searcher for the box - rectSearcher, err = boxSearcher(indexReader, + rectSearcher, err = boxSearcher(ctx, indexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, field, boost, options, false) if err != nil { @@ -63,27 +65,27 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, } // wrap it in a filtering searcher which checks the actual distance - return NewFilteringSearcher(rectSearcher, + return NewFilteringSearcher(ctx, rectSearcher, buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil } // boxSearcher builds a searcher for the described bounding box // if the desired box crosses the dateline, it is automatically split into // two boxes joined through a disjunction searcher -func boxSearcher(indexReader index.IndexReader, +func boxSearcher(ctx context.Context, indexReader index.IndexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( search.Searcher, error) { if bottomRightLon < topLeftLon { // cross date line, rewrite as two parts - leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, + leftSearcher, err := NewGeoBoundingBoxSearcher(ctx, indexReader, -180, bottomRightLat, bottomRightLon, topLeftLat, field, boost, options, checkBoundaries) if err != nil { return nil, err } - rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, + rightSearcher, err := NewGeoBoundingBoxSearcher(ctx, indexReader, topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, checkBoundaries) if err != nil { @@ -91,7 +93,7 @@ func boxSearcher(indexReader index.IndexReader, return nil, err } - boxSearcher, err := NewDisjunctionSearcher(indexReader, + boxSearcher, err := NewDisjunctionSearcher(ctx, indexReader, []search.Searcher{leftSearcher, rightSearcher}, 0, options) if err != nil { _ = leftSearcher.Close() @@ -102,7 +104,7 @@ func boxSearcher(indexReader index.IndexReader, } // build geoboundingbox searcher for that bounding box - boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, + boxSearcher, err := NewGeoBoundingBoxSearcher(ctx, indexReader, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, options, checkBoundaries) if err != nil { diff --git a/search/searcher/search_geopointdistance_test.go b/search/searcher/search_geopointdistance_test.go index 739e8750f..22394348c 100644 --- a/search/searcher/search_geopointdistance_test.go +++ b/search/searcher/search_geopointdistance_test.go @@ -66,7 +66,7 @@ func TestGeoPointDistanceSearcher(t *testing.T) { func testGeoPointDistanceSearch(i index.IndexReader, centerLon, centerLat, dist float64, field string) ([]string, error) { var rv []string - gds, err := NewGeoPointDistanceSearcher(i, centerLon, centerLat, dist, field, 1.0, search.SearcherOptions{}) + gds, err := NewGeoPointDistanceSearcher(nil, i, centerLon, centerLat, dist, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } @@ -112,7 +112,7 @@ func TestGeoPointDistanceCompare(t *testing.T) { compare := func(desc string, minLon, minLat, maxLon, maxLat float64, checkBoundaries bool) { // do math to produce list of terms needed for this search - onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1, + onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(nil, 0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "") if err != nil { t.Fatal(err) diff --git a/search/searcher/search_geopolygon.go b/search/searcher/search_geopolygon.go index ce523432f..1d6538adf 100644 --- a/search/searcher/search_geopolygon.go +++ b/search/searcher/search_geopolygon.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "fmt" "math" @@ -24,7 +25,7 @@ import ( index "github.com/blevesearch/bleve_index_api" ) -func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, +func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexReader, coordinates []geo.Point, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { if len(coordinates) < 3 { @@ -36,7 +37,7 @@ func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, tp, err := sr.GetSpatialAnalyzerPlugin("s2") if err == nil { terms := tp.GetQueryTokens(geo.NewBoundedPolygon(coordinates)) - rectSearcher, err = NewMultiTermSearcher(indexReader, terms, + rectSearcher, err = NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, false) if err != nil { return nil, err @@ -55,7 +56,7 @@ func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, } // build a searcher for the bounding box on the polygon - rectSearcher, err = boxSearcher(indexReader, + rectSearcher, err = boxSearcher(ctx, indexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, field, boost, options, true) if err != nil { @@ -69,7 +70,7 @@ func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, } // wrap it in a filtering searcher that checks for the polygon inclusivity - return NewFilteringSearcher(rectSearcher, + return NewFilteringSearcher(ctx, rectSearcher, buildPolygonFilter(dvReader, field, coordinates)), nil } diff --git a/search/searcher/search_geopolygon_test.go b/search/searcher/search_geopolygon_test.go index 75d50be21..2b300be3a 100644 --- a/search/searcher/search_geopolygon_test.go +++ b/search/searcher/search_geopolygon_test.go @@ -149,7 +149,7 @@ func TestGeoRectanglePolygon(t *testing.T) { func testGeoPolygonSearch(i index.IndexReader, polygon []geo.Point, field string) ([]string, error) { var rv []string - gbs, err := NewGeoBoundedPolygonSearcher(i, polygon, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoBoundedPolygonSearcher(nil, i, polygon, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 9f03d89cb..d2c6b1c55 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -16,6 +16,7 @@ package searcher import ( "bytes" + "context" "github.com/blevesearch/bleve/v2/geo" "github.com/blevesearch/bleve/v2/search" @@ -23,7 +24,7 @@ import ( "github.com/blevesearch/geo/geojson" ) -func NewGeoShapeSearcher(indexReader index.IndexReader, shape index.GeoJSON, +func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, shape index.GeoJSON, relation string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { var err error @@ -41,7 +42,7 @@ func NewGeoShapeSearcher(indexReader index.IndexReader, shape index.GeoJSON, // obtain the query tokens. terms := spatialPlugin.GetQueryTokens(shape) - mSearcher, err := NewMultiTermSearcher(indexReader, terms, + mSearcher, err := NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, false) if err != nil { return nil, err @@ -52,7 +53,7 @@ func NewGeoShapeSearcher(indexReader index.IndexReader, shape index.GeoJSON, return nil, err } - return NewFilteringSearcher(mSearcher, + return NewFilteringSearcher(ctx, mSearcher, buildRelationFilterOnShapes(dvReader, field, relation, shape)), nil } diff --git a/search/searcher/search_geoshape_circle_test.go b/search/searcher/search_geoshape_circle_test.go index 1ff50a5e8..a5d2b562a 100644 --- a/search/searcher/search_geoshape_circle_test.go +++ b/search/searcher/search_geoshape_circle_test.go @@ -201,7 +201,7 @@ func runGeoShapeCircleRelationQuery(relation string, i index.IndexReader, var rv []string s := geo.NewGeoCircle(points, radius) - gbs, err := NewGeoShapeSearcher(i, s, relation, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } diff --git a/search/searcher/search_geoshape_envelope_test.go b/search/searcher/search_geoshape_envelope_test.go index 7d3ff9762..22a53bde6 100644 --- a/search/searcher/search_geoshape_envelope_test.go +++ b/search/searcher/search_geoshape_envelope_test.go @@ -221,7 +221,7 @@ func runGeoShapeEnvelopeRelationQuery(relation string, i index.IndexReader, var rv []string s := geo.NewGeoEnvelope(points) - gbs, err := NewGeoShapeSearcher(i, s, relation, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } diff --git a/search/searcher/search_geoshape_geometrycollection_test.go b/search/searcher/search_geoshape_geometrycollection_test.go index 16fca0891..bba53f21b 100644 --- a/search/searcher/search_geoshape_geometrycollection_test.go +++ b/search/searcher/search_geoshape_geometrycollection_test.go @@ -286,7 +286,7 @@ func runGeoShapeGeometryCollectionRelationQuery(relation string, i index.IndexRe return nil, err } - gbs, err := NewGeoShapeSearcher(i, s, relation, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } diff --git a/search/searcher/search_geoshape_linestring_test.go b/search/searcher/search_geoshape_linestring_test.go index bc0e416b7..b222a2813 100644 --- a/search/searcher/search_geoshape_linestring_test.go +++ b/search/searcher/search_geoshape_linestring_test.go @@ -254,7 +254,7 @@ func runGeoShapeLinestringQueryWithRelation(relation string, i index.IndexReader func executeSearch(relation string, i index.IndexReader, s index.GeoJSON, field string) ([]string, error) { var rv []string - gbs, err := NewGeoShapeSearcher(i, s, relation, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } diff --git a/search/searcher/search_geoshape_points_test.go b/search/searcher/search_geoshape_points_test.go index b0efed6c3..adaf465eb 100644 --- a/search/searcher/search_geoshape_points_test.go +++ b/search/searcher/search_geoshape_points_test.go @@ -297,7 +297,7 @@ func runGeoShapePointRelationQuery(relation string, multi bool, s = geo.NewGeoJsonPoint(points[0]) } - gbs, err := NewGeoShapeSearcher(i, s, relation, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } diff --git a/search/searcher/search_geoshape_polygon_test.go b/search/searcher/search_geoshape_polygon_test.go index 115c9860d..c322ee200 100644 --- a/search/searcher/search_geoshape_polygon_test.go +++ b/search/searcher/search_geoshape_polygon_test.go @@ -300,7 +300,7 @@ func runGeoShapePolygonQueryWithRelation(relation string, i index.IndexReader, var rv []string s := geo.NewGeoJsonPolygon(points) - gbs, err := NewGeoShapeSearcher(i, s, relation, field, 1.0, search.SearcherOptions{}) + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err } @@ -607,7 +607,7 @@ func runGeoShapeMultiPolygonQueryWithRelation(relation string, var rv []string s := geo.NewGeoJsonMultiPolygon(points) - gbs, err := NewGeoShapeSearcher(i, s, relation, + gbs, err := NewGeoShapeSearcher(nil, i, s, relation, field, 1.0, search.SearcherOptions{}) if err != nil { return nil, err diff --git a/search/searcher/search_ip_range.go b/search/searcher/search_ip_range.go index d62f05808..382662062 100644 --- a/search/searcher/search_ip_range.go +++ b/search/searcher/search_ip_range.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "net" "github.com/blevesearch/bleve/v2/search" @@ -39,7 +40,7 @@ func netLimits(n *net.IPNet) (lo net.IP, hi net.IP) { return lo, hi } -func NewIPRangeSearcher(indexReader index.IndexReader, ipNet *net.IPNet, +func NewIPRangeSearcher(ctx context.Context, indexReader index.IndexReader, ipNet *net.IPNet, field string, boost float64, options search.SearcherOptions) ( search.Searcher, error) { @@ -63,5 +64,5 @@ func NewIPRangeSearcher(indexReader index.IndexReader, ipNet *net.IPNet, return nil, err } - return NewMultiTermSearcher(indexReader, terms, field, boost, options, true) + return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true) } diff --git a/search/searcher/search_match_all.go b/search/searcher/search_match_all.go index db8306eee..57d8d0727 100644 --- a/search/searcher/search_match_all.go +++ b/search/searcher/search_match_all.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "reflect" "github.com/blevesearch/bleve/v2/search" @@ -37,7 +38,7 @@ type MatchAllSearcher struct { count uint64 } -func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) { +func NewMatchAllSearcher(ctx context.Context, indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) { reader, err := indexReader.DocIDReaderAll() if err != nil { return nil, err @@ -48,6 +49,7 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s return nil, err } scorer := scorer.NewConstantScorer(1.0, boost, options) + return &MatchAllSearcher{ indexReader: indexReader, reader: reader, diff --git a/search/searcher/search_match_all_test.go b/search/searcher/search_match_all_test.go index 819d5463f..74e72c328 100644 --- a/search/searcher/search_match_all_test.go +++ b/search/searcher/search_match_all_test.go @@ -36,12 +36,12 @@ func TestMatchAllSearch(t *testing.T) { explainTrue := search.SearcherOptions{Explain: true} - allSearcher, err := NewMatchAllSearcher(twoDocIndexReader, 1.0, explainTrue) + allSearcher, err := NewMatchAllSearcher(nil, twoDocIndexReader, 1.0, explainTrue) if err != nil { t.Fatal(err) } - allSearcher2, err := NewMatchAllSearcher(twoDocIndexReader, 1.2, explainTrue) + allSearcher2, err := NewMatchAllSearcher(nil, twoDocIndexReader, 1.2, explainTrue) if err != nil { t.Fatal(err) } diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go index 523bf4b55..913f99f55 100644 --- a/search/searcher/search_multi_term.go +++ b/search/searcher/search_multi_term.go @@ -15,41 +15,43 @@ package searcher import ( + "context" "fmt" + "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" ) -func NewMultiTermSearcher(indexReader index.IndexReader, terms []string, +func NewMultiTermSearcher(ctx context.Context, indexReader index.IndexReader, terms []string, field string, boost float64, options search.SearcherOptions, limit bool) ( search.Searcher, error) { if tooManyClauses(len(terms)) { if optionsDisjunctionOptimizable(options) { - return optimizeMultiTermSearcher(indexReader, terms, field, boost, options) + return optimizeMultiTermSearcher(ctx, indexReader, terms, field, boost, options) } if limit { return nil, tooManyClausesErr(field, len(terms)) } } - qsearchers, err := makeBatchSearchers(indexReader, terms, field, boost, options) + qsearchers, err := makeBatchSearchers(ctx, indexReader, terms, field, boost, options) if err != nil { return nil, err } // build disjunction searcher of these ranges - return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost, + return newMultiTermSearcherInternal(ctx, indexReader, qsearchers, field, boost, options, limit) } -func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte, +func NewMultiTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, terms [][]byte, field string, boost float64, options search.SearcherOptions, limit bool) ( search.Searcher, error) { if tooManyClauses(len(terms)) { if optionsDisjunctionOptimizable(options) { - return optimizeMultiTermSearcherBytes(indexReader, terms, field, boost, options) + return optimizeMultiTermSearcherBytes(ctx, indexReader, terms, field, boost, options) } if limit { @@ -57,23 +59,23 @@ func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte, } } - qsearchers, err := makeBatchSearchersBytes(indexReader, terms, field, boost, options) + qsearchers, err := makeBatchSearchersBytes(ctx, indexReader, terms, field, boost, options) if err != nil { return nil, err } // build disjunction searcher of these ranges - return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost, + return newMultiTermSearcherInternal(ctx, indexReader, qsearchers, field, boost, options, limit) } -func newMultiTermSearcherInternal(indexReader index.IndexReader, +func newMultiTermSearcherInternal(ctx context.Context, indexReader index.IndexReader, searchers []search.Searcher, field string, boost float64, options search.SearcherOptions, limit bool) ( search.Searcher, error) { // build disjunction searcher of these ranges - searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options, + searcher, err := newDisjunctionSearcher(ctx, indexReader, searchers, 0, options, limit) if err != nil { for _, s := range searchers { @@ -85,7 +87,7 @@ func newMultiTermSearcherInternal(indexReader index.IndexReader, return searcher, nil } -func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string, +func optimizeMultiTermSearcher(ctx context.Context, indexReader index.IndexReader, terms []string, field string, boost float64, options search.SearcherOptions) ( search.Searcher, error) { var finalSearcher search.Searcher @@ -98,7 +100,7 @@ func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string, batchTerms = terms terms = nil } - batch, err := makeBatchSearchers(indexReader, batchTerms, field, boost, options) + batch, err := makeBatchSearchers(ctx, indexReader, batchTerms, field, boost, options) if err != nil { return nil, err } @@ -112,7 +114,7 @@ func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string, } } } - finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned", + finalSearcher, err = optimizeCompositeSearcher(ctx, "disjunction:unadorned", indexReader, batch, options) // all searchers in batch should be closed, regardless of error or optimization failure // either we're returning, or continuing and only finalSearcher is needed for next loop @@ -127,7 +129,7 @@ func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string, return finalSearcher, nil } -func makeBatchSearchers(indexReader index.IndexReader, terms []string, field string, +func makeBatchSearchers(ctx context.Context, indexReader index.IndexReader, terms []string, field string, boost float64, options search.SearcherOptions) ([]search.Searcher, error) { qsearchers := make([]search.Searcher, len(terms)) @@ -140,7 +142,7 @@ func makeBatchSearchers(indexReader index.IndexReader, terms []string, field str } for i, term := range terms { var err error - qsearchers[i], err = NewTermSearcher(indexReader, term, field, boost, options) + qsearchers[i], err = NewTermSearcher(ctx, indexReader, term, field, boost, options) if err != nil { qsearchersClose() return nil, err @@ -149,7 +151,7 @@ func makeBatchSearchers(indexReader index.IndexReader, terms []string, field str return qsearchers, nil } -func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte, +func optimizeMultiTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, terms [][]byte, field string, boost float64, options search.SearcherOptions) ( search.Searcher, error) { @@ -163,7 +165,7 @@ func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byt batchTerms = terms terms = nil } - batch, err := makeBatchSearchersBytes(indexReader, batchTerms, field, boost, options) + batch, err := makeBatchSearchersBytes(ctx, indexReader, batchTerms, field, boost, options) if err != nil { return nil, err } @@ -177,7 +179,7 @@ func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byt } } } - finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned", + finalSearcher, err = optimizeCompositeSearcher(ctx, "disjunction:unadorned", indexReader, batch, options) // all searchers in batch should be closed, regardless of error or optimization failure // either we're returning, or continuing and only finalSearcher is needed for next loop @@ -192,7 +194,7 @@ func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byt return finalSearcher, nil } -func makeBatchSearchersBytes(indexReader index.IndexReader, terms [][]byte, field string, +func makeBatchSearchersBytes(ctx context.Context, indexReader index.IndexReader, terms [][]byte, field string, boost float64, options search.SearcherOptions) ([]search.Searcher, error) { qsearchers := make([]search.Searcher, len(terms)) @@ -205,7 +207,7 @@ func makeBatchSearchersBytes(indexReader index.IndexReader, terms [][]byte, fiel } for i, term := range terms { var err error - qsearchers[i], err = NewTermSearcherBytes(indexReader, term, field, boost, options) + qsearchers[i], err = NewTermSearcherBytes(ctx, indexReader, term, field, boost, options) if err != nil { qsearchersClose() return nil, err diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go index 6ab5147be..68728c94c 100644 --- a/search/searcher/search_numeric_range.go +++ b/search/searcher/search_numeric_range.go @@ -16,6 +16,7 @@ package searcher import ( "bytes" + "context" "math" "sort" @@ -24,7 +25,7 @@ import ( index "github.com/blevesearch/bleve_index_api" ) -func NewNumericRangeSearcher(indexReader index.IndexReader, +func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { // account for unbounded edges @@ -55,6 +56,7 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, } var fieldDict index.FieldDictContains + var dictBytesRead uint64 var isIndexed filterFunc var err error if irr, ok := indexReader.(index.IndexReaderContains); ok { @@ -67,6 +69,8 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, found, err := fieldDict.Contains(term) return err == nil && found } + + dictBytesRead = fieldDict.BytesRead() } // FIXME hard-coded precision, should match field declaration @@ -81,10 +85,16 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, } if len(terms) < 1 { + // reporting back the IO stats with respect to the dictionary + // loaded, using the context + if ctx != nil { + reportIOStats(dictBytesRead, ctx) + } + // cannot return MatchNoneSearcher because of interaction with // commit f391b991c20f02681bacd197afc6d8aed444e132 - return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, - true) + return NewMultiTermSearcherBytes(ctx, indexReader, terms, field, + boost, options, true) } // for upside_down @@ -99,8 +109,12 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, return nil, tooManyClausesErr(field, len(terms)) } - return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, - true) + if ctx != nil { + reportIOStats(dictBytesRead, ctx) + } + + return NewMultiTermSearcherBytes(ctx, indexReader, terms, field, + boost, options, true) } func filterCandidateTerms(indexReader index.IndexReader, diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index c262fd914..087ad768c 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "fmt" "math" "reflect" @@ -63,22 +64,22 @@ func (s *PhraseSearcher) Size() int { return sizeInBytes } -func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) { +func NewPhraseSearcher(ctx context.Context, indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) { // turn flat terms []string into [][]string mterms := make([][]string, len(terms)) for i, term := range terms { mterms[i] = []string{term} } - return NewMultiPhraseSearcher(indexReader, mterms, field, options) + return NewMultiPhraseSearcher(ctx, indexReader, mterms, field, options) } -func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) { +func NewMultiPhraseSearcher(ctx context.Context, indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) { options.IncludeTermVectors = true var termPositionSearchers []search.Searcher for _, termPos := range terms { if len(termPos) == 1 && termPos[0] != "" { // single term - ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options) + ts, err := NewTermSearcher(ctx, indexReader, termPos[0], field, 1.0, options) if err != nil { // close any searchers already opened for _, ts := range termPositionSearchers { @@ -94,7 +95,7 @@ func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, fie if term == "" { continue } - ts, err := NewTermSearcher(indexReader, term, field, 1.0, options) + ts, err := NewTermSearcher(ctx, indexReader, term, field, 1.0, options) if err != nil { // close any searchers already opened for _, ts := range termPositionSearchers { @@ -104,7 +105,7 @@ func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, fie } termSearchers = append(termSearchers, ts) } - disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options) + disjunction, err := NewDisjunctionSearcher(ctx, indexReader, termSearchers, 1, options) if err != nil { // close any searchers already opened for _, ts := range termPositionSearchers { @@ -116,7 +117,7 @@ func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, fie } } - mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options) + mustSearcher, err := NewConjunctionSearcher(ctx, indexReader, termPositionSearchers, options) if err != nil { // close any searchers already opened for _, ts := range termPositionSearchers { diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go index e83ea35e3..f74d65619 100644 --- a/search/searcher/search_phrase_test.go +++ b/search/searcher/search_phrase_test.go @@ -36,7 +36,7 @@ func TestPhraseSearch(t *testing.T) { }() soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true} - phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, []string{"angst", "beer"}, "desc", soptions) + phraseSearcher, err := NewPhraseSearcher(nil, twoDocIndexReader, []string{"angst", "beer"}, "desc", soptions) if err != nil { t.Fatal(err) } @@ -130,7 +130,7 @@ func TestMultiPhraseSearch(t *testing.T) { if err != nil { t.Error(err) } - searcher, err := NewMultiPhraseSearcher(reader, test.phrase, "desc", soptions) + searcher, err := NewMultiPhraseSearcher(nil, reader, test.phrase, "desc", soptions) if err != nil { t.Error(err) } diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go index 81b1cf734..b419d5470 100644 --- a/search/searcher/search_regexp.go +++ b/search/searcher/search_regexp.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "regexp" "github.com/blevesearch/bleve/v2/search" @@ -34,7 +35,7 @@ type Regexp interface { // NewRegexpStringSearcher is similar to NewRegexpSearcher, but // additionally optimizes for index readers that handle regexp's. -func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string, +func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader, pattern string, field string, boost float64, options search.SearcherOptions) ( search.Searcher, error) { ir, ok := indexReader.(index.IndexReaderRegexp) @@ -44,7 +45,7 @@ func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string, return nil, err } - return NewRegexpSearcher(indexReader, r, field, boost, options) + return NewRegexpSearcher(ctx, indexReader, r, field, boost, options) } fieldDict, err := ir.FieldDictRegexp(field, pattern) @@ -68,7 +69,7 @@ func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string, return nil, err } - return NewMultiTermSearcher(indexReader, candidateTerms, field, boost, + return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost, options, true) } @@ -77,31 +78,47 @@ func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string, // matching the entire term. The provided regexp SHOULD NOT start with ^ // or end with $ as this can intefere with the implementation. Separately, // matches will be checked to ensure they match the entire term. -func NewRegexpSearcher(indexReader index.IndexReader, pattern Regexp, +func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, pattern Regexp, field string, boost float64, options search.SearcherOptions) ( search.Searcher, error) { var candidateTerms []string - + var regexpCandidates *regexpCandidates prefixTerm, complete := pattern.LiteralPrefix() if complete { // there is no pattern candidateTerms = []string{prefixTerm} } else { var err error - candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, + regexpCandidates, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm) if err != nil { return nil, err } } + var dictBytesRead uint64 + if regexpCandidates != nil { + candidateTerms = regexpCandidates.candidates + dictBytesRead = regexpCandidates.bytesRead + } + + if ctx != nil { + reportIOStats(dictBytesRead, ctx) + } - return NewMultiTermSearcher(indexReader, candidateTerms, field, boost, + return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost, options, true) } +type regexpCandidates struct { + candidates []string + bytesRead uint64 +} + func findRegexpCandidateTerms(indexReader index.IndexReader, - pattern Regexp, field, prefixTerm string) (rv []string, err error) { - rv = make([]string, 0) + pattern Regexp, field, prefixTerm string) (rv *regexpCandidates, err error) { + rv = ®expCandidates{ + candidates: make([]string, 0), + } var fieldDict index.FieldDict if len(prefixTerm) > 0 { fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) @@ -119,13 +136,13 @@ func findRegexpCandidateTerms(indexReader index.IndexReader, for err == nil && tfd != nil { matchPos := pattern.FindStringIndex(tfd.Term) if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) { - rv = append(rv, tfd.Term) - if tooManyClauses(len(rv)) { - return rv, tooManyClausesErr(field, len(rv)) + rv.candidates = append(rv.candidates, tfd.Term) + if tooManyClauses(len(rv.candidates)) { + return rv, tooManyClausesErr(field, len(rv.candidates)) } } tfd, err = fieldDict.Next() } - + rv.bytesRead = fieldDict.BytesRead() return rv, err } diff --git a/search/searcher/search_regexp_test.go b/search/searcher/search_regexp_test.go index 9f65fd2ea..863547edc 100644 --- a/search/searcher/search_regexp_test.go +++ b/search/searcher/search_regexp_test.go @@ -76,7 +76,7 @@ func searcherMaker(t *testing.T, ir index.IndexReader, re, field string) search. t.Fatal(err) } - regexpSearcher, err := NewRegexpSearcher(ir, pattern, field, 1.0, + regexpSearcher, err := NewRegexpSearcher(nil, ir, pattern, field, 1.0, search.SearcherOptions{Explain: true}) if err != nil { t.Fatal(err) @@ -86,7 +86,7 @@ func searcherMaker(t *testing.T, ir index.IndexReader, re, field string) search. } func searcherStringMaker(t *testing.T, ir index.IndexReader, re, field string) search.Searcher { - regexpSearcher, err := NewRegexpStringSearcher(ir, re, field, 1.0, + regexpSearcher, err := NewRegexpStringSearcher(nil, ir, re, field, 1.0, search.SearcherOptions{Explain: true}) if err != nil { t.Fatal(err) diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go index 55c18d163..db18e5376 100644 --- a/search/searcher/search_term.go +++ b/search/searcher/search_term.go @@ -15,6 +15,7 @@ package searcher import ( + "context" "reflect" "github.com/blevesearch/bleve/v2/search" @@ -37,13 +38,13 @@ type TermSearcher struct { tfd index.TermFieldDoc } -func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { - return NewTermSearcherBytes(indexReader, []byte(term), field, boost, options) +func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { + return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options) } -func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { +func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { needFreqNorm := options.Score != "none" - reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors) + reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors) if err != nil { return nil, err } diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go index a01b18690..89f836a50 100644 --- a/search/searcher/search_term_prefix.go +++ b/search/searcher/search_term_prefix.go @@ -15,11 +15,13 @@ package searcher import ( + "context" + "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" ) -func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, +func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, prefix string, field string, boost float64, options search.SearcherOptions) ( search.Searcher, error) { // find the terms with this prefix @@ -46,5 +48,9 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, return nil, err } - return NewMultiTermSearcher(indexReader, terms, field, boost, options, true) + if ctx != nil { + reportIOStats(fieldDict.BytesRead(), ctx) + } + + return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true) } diff --git a/search/searcher/search_term_range.go b/search/searcher/search_term_range.go index 5ef58f76f..a2fb4e993 100644 --- a/search/searcher/search_term_range.go +++ b/search/searcher/search_term_range.go @@ -15,11 +15,13 @@ package searcher import ( + "context" + "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" ) -func NewTermRangeSearcher(indexReader index.IndexReader, +func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader, min, max []byte, inclusiveMin, inclusiveMax *bool, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { @@ -81,5 +83,9 @@ func NewTermRangeSearcher(indexReader index.IndexReader, terms = terms[:len(terms)-1] } - return NewMultiTermSearcher(indexReader, terms, field, boost, options, true) + if ctx != nil { + reportIOStats(fieldDict.BytesRead(), ctx) + } + + return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true) } diff --git a/search/searcher/search_term_range_test.go b/search/searcher/search_term_range_test.go index 126e03ee4..0d199cb8c 100644 --- a/search/searcher/search_term_range_test.go +++ b/search/searcher/search_term_range_test.go @@ -15,13 +15,14 @@ package searcher import ( - "github.com/blevesearch/bleve/v2/index/scorch" "io/ioutil" "os" "reflect" "sort" "testing" + "github.com/blevesearch/bleve/v2/index/scorch" + "github.com/blevesearch/bleve/v2/search" ) @@ -174,7 +175,7 @@ func TestTermRangeSearch(t *testing.T) { for _, test := range tests { - searcher, err := NewTermRangeSearcher(twoDocIndexReader, test.min, test.max, + searcher, err := NewTermRangeSearcher(nil, twoDocIndexReader, test.min, test.max, &test.inclusiveMin, &test.inclusiveMax, test.field, 1.0, search.SearcherOptions{Explain: true}) if err != nil { t.Fatal(err) @@ -232,7 +233,7 @@ func TestTermRangeSearchTooManyTerms(t *testing.T) { var want = []string{"1", "3", "4", "5"} var truth = true - searcher, err := NewTermRangeSearcher(scorchReader, []byte("bobert"), []byte("ravi"), + searcher, err := NewTermRangeSearcher(nil, scorchReader, []byte("bobert"), []byte("ravi"), &truth, &truth, "name", 1.0, search.SearcherOptions{Score: "none", IncludeTermVectors: false}) if err != nil { t.Fatal(err) diff --git a/search/searcher/search_term_test.go b/search/searcher/search_term_test.go index d7a05359b..0c08d4c54 100644 --- a/search/searcher/search_term_test.go +++ b/search/searcher/search_term_test.go @@ -118,7 +118,7 @@ func TestTermSearcher(t *testing.T) { } }() - searcher, err := NewTermSearcher(indexReader, queryTerm, queryField, queryBoost, queryExplain) + searcher, err := NewTermSearcher(nil, indexReader, queryTerm, queryField, queryBoost, queryExplain) if err != nil { t.Fatal(err) } diff --git a/test/versus_test.go b/test/versus_test.go index e96eae6ad..795e37855 100644 --- a/test/versus_test.go +++ b/test/versus_test.go @@ -359,6 +359,8 @@ func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB ble resA.Hits = nil resB.Hits = nil + resA.BytesRead = 0 + resB.BytesRead = 0 if !reflect.DeepEqual(resA, resB) { resAj, _ := json.Marshal(resA) @@ -387,9 +389,7 @@ func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch { hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0 hit.IndexInternalID = nil hit.HitNumber = 0 - // Ignoring the BytesRead value, since it doesn't have - // relevance in this type of test - hit.BytesRead = 0 + rv[hit.ID] = hit }