Skip to content

Commit

Permalink
added regexp and wildcard queries
Browse files Browse the repository at this point in the history
  • Loading branch information
mschoch committed Mar 11, 2015
1 parent 183fcd4 commit a41f229
Show file tree
Hide file tree
Showing 5 changed files with 403 additions and 0 deletions.
18 changes: 18 additions & 0 deletions query.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,5 +189,23 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasRegexp := tmp["regexp"]
if hasRegexp {
var rv regexpQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasWildcard := tmp["wildcard"]
if hasWildcard {
var rv wildcardQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, ErrorUnknownQueryType
}
75 changes: 75 additions & 0 deletions query_regexp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.

package bleve

import (
"regexp"

"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searchers"
)

type regexpQuery struct {
Regexp string `json:"regexp"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
compiled *regexp.Regexp
}

// NewRegexpQuery creates a new Query which finds
// documents containing terms that match the
// specified regular expression.
func NewRegexpQuery(regexp string) *regexpQuery {
return &regexpQuery{
Regexp: regexp,
BoostVal: 1.0,
}
}

func (q *regexpQuery) Boost() float64 {
return q.BoostVal
}

func (q *regexpQuery) SetBoost(b float64) Query {
q.BoostVal = b
return q
}

func (q *regexpQuery) Field() string {
return q.FieldVal
}

func (q *regexpQuery) SetField(f string) Query {
q.FieldVal = f
return q
}

func (q *regexpQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultField
}
if q.compiled == nil {
var err error
q.compiled, err = regexp.Compile(q.Regexp)
if err != nil {
return nil, err
}
}

return searchers.NewRegexpSearcher(i, q.compiled, field, q.BoostVal, explain)
}

func (q *regexpQuery) Validate() error {
var err error
q.compiled, err = regexp.Compile(q.Regexp)
return err
}
102 changes: 102 additions & 0 deletions query_wildcard.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.

package bleve

import (
"regexp"
"strings"

"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searchers"
)

var wildcardRegexpReplacer = strings.NewReplacer(
// characters in the wildcard that must
// be escaped in the regexp
"+", `\+`,
"(", `\(`,
")", `\)`,
"^", `\^`,
"$", `\$`,
".", `\.`,
"{", `\{`,
"}", `\}`,
"[", `\[`,
"]", `\]`,
`|`, `\|`,
`\`, `\\`,
// wildcard characters
"*", ".*",
"?", ".")

type wildcardQuery struct {
Wildcard string `json:"wildcard"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
compiled *regexp.Regexp
}

// NewWildcardQuery creates a new Query which finds
// documents containing terms that match the
// specified wildcard. In the wildcard pattern '*'
// will match any sequence of 0 or more characters,
// and '?' will match any single character.
func NewWildcardQuery(wildcard string) *wildcardQuery {
return &wildcardQuery{
Wildcard: wildcard,
BoostVal: 1.0,
}
}

func (q *wildcardQuery) Boost() float64 {
return q.BoostVal
}

func (q *wildcardQuery) SetBoost(b float64) Query {
q.BoostVal = b
return q
}

func (q *wildcardQuery) Field() string {
return q.FieldVal
}

func (q *wildcardQuery) SetField(f string) Query {
q.FieldVal = f
return q
}

func (q *wildcardQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultField
}
if q.compiled == nil {
var err error
q.compiled, err = q.convertToRegexp()
if err != nil {
return nil, err
}
}

return searchers.NewRegexpSearcher(i, q.compiled, field, q.BoostVal, explain)
}

func (q *wildcardQuery) Validate() error {
var err error
q.compiled, err = q.convertToRegexp()
return err
}

func (q *wildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
regexpString := "^" + wildcardRegexpReplacer.Replace(q.Wildcard) + "$"
return regexp.Compile(regexpString)
}
108 changes: 108 additions & 0 deletions search/searchers/search_regexp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) 2015 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.

package searchers

import (
"regexp"

"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)

type RegexpSearcher struct {
indexReader index.IndexReader
pattern *regexp.Regexp
field string
explain bool
searcher *DisjunctionSearcher
}

func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {

prefixTerm, complete := pattern.LiteralPrefix()
candidateTerms := make([]string, 0)
if complete {
// there is no pattern
candidateTerms = append(candidateTerms, prefixTerm)
} else {
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}

// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if pattern.MatchString(tfd.Term) {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
}

// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)

for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}

// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
return nil, err
}

return &RegexpSearcher{
indexReader: indexReader,
pattern: pattern,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *RegexpSearcher) Count() uint64 {
return s.searcher.Count()
}

func (s *RegexpSearcher) Weight() float64 {
return s.searcher.Weight()
}

func (s *RegexpSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}

func (s *RegexpSearcher) Next() (*search.DocumentMatch, error) {
return s.searcher.Next()

}

func (s *RegexpSearcher) Advance(ID string) (*search.DocumentMatch, error) {
return s.searcher.Next()
}

func (s *RegexpSearcher) Close() error {
return s.searcher.Close()
}

func (s *RegexpSearcher) Min() int {
return 0
}
Loading

0 comments on commit a41f229

Please sign in to comment.