Skip to content

Commit

Permalink
finished initial impl of fuzzy search
Browse files Browse the repository at this point in the history
you can do a manual fuzzy term search using the FuzzyQuery struct
or, more suitable for most users the MatchQuery now supports
some fuzzy options.  Here you can specify fuzziness and
prefix_length, to turn the underlying term search into a fuzzy
term search.  This has the benefit that analysis is performed
on your input, just like the analyzed field, prior to computing
the fuzzy variants.

closes blevesearch#82
  • Loading branch information
mschoch committed Oct 24, 2014
1 parent 78467c0 commit 3a0263b
Show file tree
Hide file tree
Showing 10 changed files with 745 additions and 334 deletions.
4 changes: 2 additions & 2 deletions query.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ func ParseQuery(input []byte) (Query, error) {
if err != nil {
return nil, err
}
_, isMatchQuery := tmp["match"]
_, hasFuzziness := tmp["fuzziness"]
if hasFuzziness {
if hasFuzziness && !isMatchQuery {
var rv fuzzyQuery
err := json.Unmarshal(input, &rv)
if err != nil {
Expand All @@ -59,7 +60,6 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, isMatchQuery := tmp["match"]
if isMatchQuery {
var rv matchQuery
err := json.Unmarshal(input, &rv)
Expand Down
38 changes: 28 additions & 10 deletions query_fuzzy.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@ import (
)

type fuzzyQuery struct {
Term string `json:"term"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Term string `json:"term"`
PrefixVal int `json:"prefix_length"`
FuzzinessVal int `json:"fuzziness"`
FieldVal string `json:"field,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
}

// NewPrefixQuery creates a new Query which finds
// documents containing terms that start with the
// specified prefix.
func NewFuzzyQuery(term string) *fuzzyQuery {
return &fuzzyQuery{
Term: term,
Prefix: 0,
Fuzziness: 1,
BoostVal: 1.0,
Term: term,
PrefixVal: 0,
FuzzinessVal: 1,
BoostVal: 1.0,
}
}

Expand All @@ -53,12 +53,30 @@ func (q *fuzzyQuery) SetField(f string) Query {
return q
}

func (q *fuzzyQuery) Fuzziness() int {
return q.FuzzinessVal
}

func (q *fuzzyQuery) SetFuzziness(f int) Query {
q.FuzzinessVal = f
return q
}

func (q *fuzzyQuery) Prefix() int {
return q.PrefixVal
}

func (q *fuzzyQuery) SetPrefix(p int) Query {
q.PrefixVal = p
return q
}

func (q *fuzzyQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultField
}
return searchers.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal, explain)
return searchers.NewFuzzySearcher(i, q.Term, q.PrefixVal, q.FuzzinessVal, field, q.BoostVal, explain)
}

func (q *fuzzyQuery) Validate() error {
Expand Down
49 changes: 41 additions & 8 deletions query_match.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,19 @@ package bleve

import (
"fmt"
"log"

"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)

type matchQuery struct {
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal float64 `json:"boost,omitempty"`
PrefixVal int `json:"prefix_length"`
FuzzinessVal int `json:"fuzziness"`
}

// NewMatchQuery creates a Query for matching text.
Expand Down Expand Up @@ -54,6 +57,24 @@ func (q *matchQuery) SetField(f string) Query {
return q
}

func (q *matchQuery) Fuzziness() int {
return q.FuzzinessVal
}

func (q *matchQuery) SetFuzziness(f int) Query {
q.FuzzinessVal = f
return q
}

func (q *matchQuery) Prefix() int {
return q.PrefixVal
}

func (q *matchQuery) SetPrefix(p int) Query {
q.PrefixVal = p
return q
}

func (q *matchQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {

field := q.FieldVal
Expand All @@ -77,10 +98,22 @@ func (q *matchQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool
if len(tokens) > 0 {

tqs := make([]Query, len(tokens))
for i, token := range tokens {
tqs[i] = NewTermQuery(string(token.Term)).
SetField(field).
SetBoost(q.BoostVal)
if q.FuzzinessVal != 0 {
log.Printf("fuzziness is %d", q.FuzzinessVal)
for i, token := range tokens {
query := NewFuzzyQuery(string(token.Term))
query.SetFuzziness(q.FuzzinessVal)
query.SetPrefix(q.PrefixVal)
query.SetField(field)
query.SetBoost(q.BoostVal)
tqs[i] = query
}
} else {
for i, token := range tokens {
tqs[i] = NewTermQuery(string(token.Term)).
SetField(field).
SetBoost(q.BoostVal)
}
}

shouldQuery := NewDisjunctionQueryMin(tqs, 1).
Expand Down
9 changes: 8 additions & 1 deletion query_string.nex
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,21 @@
/>/ { logDebugTokens("GREATER"); return tGREATER }
/</ { logDebugTokens("LESS"); return tLESS }
/=/ { logDebugTokens("EQUAL"); return tEQUAL }
/~([0-9]|[1-9][0-9]*)/
{
lval.s = yylex.Text()[1:]
logDebugTokens("TILDENUMBER - %s", lval.s);
return tTILDENUMBER
}
/~/ { logDebugTokens("TILDE"); return tTILDE }
/-?([0-9]|[1-9][0-9]*)(\.[0-9][0-9]*)?/
{
lval.s = yylex.Text()
logDebugTokens("NUMBER - %s", lval.s);
return tNUMBER
}
/[ \t\n]+/ { logDebugTokens("WHITESPACE (count=%d)", len(yylex.Text())) /* eat up whitespace */ }
/[^\t\n\f\r :^\+\-><=][^\t\n\f\r :^]*/ {
/[^\t\n\f\r :^\+\-><=~][^\t\n\f\r :^~]*/ {
lval.s = yylex.Text()
logDebugTokens("STRING - %s", lval.s);
return tSTRING
Expand Down
Loading

0 comments on commit 3a0263b

Please sign in to comment.