Skip to content

Commit

Permalink
implemented prefix search
Browse files Browse the repository at this point in the history
  • Loading branch information
mschoch committed Aug 7, 2014
1 parent b16c1d7 commit 292af78
Show file tree
Hide file tree
Showing 8 changed files with 381 additions and 1 deletion.
8 changes: 8 additions & 0 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ type Index interface {
TermFieldReader(term []byte, field string) (TermFieldReader, error)
DocIdReader(start, end string) (DocIdReader, error)

FieldReader(field string, startTerm []byte, endTerm []byte) (FieldReader, error)

DocCount() uint64

Document(id string) (*document.Document, error)
Expand All @@ -41,6 +43,7 @@ type TermFieldVector struct {
}

type TermFieldDoc struct {
Term string
ID string
Freq uint64
Norm float64
Expand All @@ -54,6 +57,11 @@ type TermFieldReader interface {
Close()
}

type FieldReader interface {
Next() (*TermFieldDoc, error)
Close()
}

type DocIdReader interface {
Next() (string, error)
Advance(ID string) (string, error)
Expand Down
87 changes: 87 additions & 0 deletions index/upside_down/field_reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down

import (
"bytes"
"fmt"

"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/index/store"
)

type UpsideDownCouchFieldReader struct {
index *UpsideDownCouch
iterator store.KVIterator
endKey []byte
field uint16
}

func newUpsideDownCouchFieldReader(index *UpsideDownCouch, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldReader, error) {

startRow := NewTermFrequencyRow(startTerm, field, "", 0, 0)
startKey := startRow.ScanPrefixForFieldTermPrefix()

endKey := NewTermFrequencyRow(endTerm, field, "", 0, 0).Key()

it := index.store.Iterator(startKey)

return &UpsideDownCouchFieldReader{
index: index,
iterator: it,
field: field,
endKey: endKey,
}, nil

}

func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) {
key, val, valid := r.iterator.Current()
if !valid {
return nil, nil
}

// past end term
if bytes.Compare(key, r.endKey) > 0 {
return nil, nil
}

currRow, err := NewTermFrequencyRowKV(key, val)
if err != nil {
return nil, fmt.Errorf("unexpected error parsing term freq row kv: %v", err)
}
rv := index.TermFieldDoc{
Term: string(currRow.term),
Freq: currRow.freq,
}
// advance the iterator to the next term
// by using invalid doc id (higher sorting)
nextTerm := incrementBytes(currRow.term)
nextRow := NewTermFrequencyRow(nextTerm, r.field, "", 0, 0)
r.iterator.Seek(nextRow.ScanPrefixForFieldTermPrefix())
return &rv, nil

}

func (r *UpsideDownCouchFieldReader) Close() {
r.iterator.Close()
}

func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didnt' overflow, so stop
break
}
}
return rv
}
113 changes: 113 additions & 0 deletions index/upside_down/field_reader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package upside_down

import (
"os"
"reflect"
"testing"

"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index/store/leveldb"
)

func TestIndexFieldReader(t *testing.T) {
defer os.RemoveAll("test")

store, err := leveldb.Open("test", true)
idx := NewUpsideDownCouch(store)
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()

var expectedCount uint64 = 0
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1

doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
doc.AddField(document.NewTextFieldCustom("prefix", []byte("bob cat cats catting dog doggy zoo"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1

reader, err := idx.FieldReader("name", nil, nil)
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer reader.Close()

termCount := 0
curr, err := reader.Next()
for err == nil && curr != nil {
termCount++
if curr.Term != "test" {
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
}
curr, err = reader.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}

reader, err = idx.FieldReader("desc", nil, nil)
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer reader.Close()

termCount = 0
terms := make([]string, 0)
curr, err = reader.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = reader.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms := []string{"eat", "more", "rice"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}

// test use case for prefix
reader, err = idx.FieldReader("prefix", []byte("cat"), []byte("cat"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer reader.Close()

termCount = 0
terms = make([]string, 0)
curr, err = reader.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = reader.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms = []string{"cats", "catting", "cat"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
}
24 changes: 24 additions & 0 deletions index/upside_down/row.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,30 @@ type TermFrequencyRow struct {
vectors []*TermVector
}

func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
buf := make([]byte, 3)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
return buf
}

func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
buf := make([]byte, 3+len(tfr.term))
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
copy(buf[3:], tfr.term)
return buf
}

func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
buf := make([]byte, 3+len(tfr.term)+1)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
buf[3+termLen] = BYTE_SEPARATOR
return buf
}

func (tfr *TermFrequencyRow) Key() []byte {
buf := make([]byte, 3+len(tfr.term)+1+len(tfr.doc))
buf[0] = 't'
Expand Down
10 changes: 9 additions & 1 deletion index/upside_down/upside_down.go
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,15 @@ func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (inde
if fieldExists {
return newUpsideDownCouchTermFieldReader(udc, term, uint16(fieldIndex))
}
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, 0)
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, ^uint16(0))
}

func (udc *UpsideDownCouch) FieldReader(fieldName string, startTerm []byte, endTerm []byte) (index.FieldReader, error) {
fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
if fieldExists {
return newUpsideDownCouchFieldReader(udc, uint16(fieldIndex), startTerm, endTerm)
}
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, ^uint16(0))
}

func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, error) {
Expand Down
9 changes: 9 additions & 0 deletions query.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,14 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasPrefix := tmp["prefix"]
if hasPrefix {
var rv PrefixQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("Unrecognized query")
}
56 changes: 56 additions & 0 deletions query_prefix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve

import (
"github.com/couchbaselabs/bleve/search"
)

type PrefixQuery struct {
Prefix string `json:"prefix"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}

func NewPrefixQuery(prefix string) *PrefixQuery {
return &PrefixQuery{
Prefix: prefix,
BoostVal: 1.0,
}
}

func (q *PrefixQuery) Boost() float64 {
return q.BoostVal
}

func (q *PrefixQuery) SetBoost(b float64) *PrefixQuery {
q.BoostVal = b
return q
}

func (q *PrefixQuery) Field() string {
return q.FieldVal
}

func (q *PrefixQuery) SetField(f string) *PrefixQuery {
q.FieldVal = f
return q
}

func (q *PrefixQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = i.m.defaultField()
}
return search.NewTermPrefixSearcher(i.i, q.Prefix, field, q.BoostVal, explain)
}

func (q *PrefixQuery) Validate() error {
return nil
}
Loading

0 comments on commit 292af78

Please sign in to comment.