Skip to content

Commit

Permalink
Optimise labels regex matchers containing a literal within the pattern (
Browse files Browse the repository at this point in the history
prometheus#7503)

* Added labels matchers regex fast path for literals within the regex

Signed-off-by: Marco Pracucci <marco@pracucci.com>
  • Loading branch information
pracucci authored Jul 7, 2020
1 parent 9875afc commit 2f6bf7d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 15 deletions.
24 changes: 19 additions & 5 deletions pkg/labels/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ import (
)

type FastRegexMatcher struct {
re *regexp.Regexp
prefix string
suffix string
re *regexp.Regexp
prefix string
suffix string
contains string
}

func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
Expand All @@ -41,7 +42,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
}

if parsed.Op == syntax.OpConcat {
m.prefix, m.suffix = optimizeConcatRegex(parsed)
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
}

return m, nil
Expand All @@ -54,6 +55,9 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
return false
}
if m.contains != "" && !strings.Contains(s, m.contains) {
return false
}
return m.re.MatchString(s)
}

Expand All @@ -63,7 +67,7 @@ func (m *FastRegexMatcher) GetRegexString() string {

// optimizeConcatRegex returns literal prefix/suffix text that can be safely
// checked against the label value before running the regexp matcher.
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
sub := r.Sub

// We can safely remove begin and end text matchers respectively
Expand All @@ -89,5 +93,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
suffix = string(sub[last].Rune)
}

// If contains any literal which is not a prefix/suffix, we keep the
// 1st one. We do not keep the whole list of literals to simplify the
// fast path.
for i := 1; i < len(sub)-1; i++ {
if sub[i].Op == syntax.OpLiteral {
contains = string(sub[i].Rune)
break
}
}

return
}
31 changes: 21 additions & 10 deletions pkg/labels/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ func TestNewFastRegexMatcher(t *testing.T) {
{regex: ".*foo", value: "\nfoo", expected: false},
{regex: "foo.*", value: "foo\n", expected: false},
{regex: "foo\n.*", value: "foo\n", expected: true},
{regex: ".*foo.*", value: "foo", expected: true},
{regex: ".*foo.*", value: "foo bar", expected: true},
{regex: ".*foo.*", value: "hello foo world", expected: true},
{regex: ".*foo.*", value: "hello foo\n world", expected: false},
{regex: ".*foo\n.*", value: "hello foo\n world", expected: true},
{regex: ".*", value: "foo", expected: true},
{regex: "", value: "foo", expected: false},
{regex: "", value: "", expected: true},
Expand All @@ -56,24 +61,30 @@ func TestNewFastRegexMatcher(t *testing.T) {

func TestOptimizeConcatRegex(t *testing.T) {
cases := []struct {
regex string
prefix string
suffix string
regex string
prefix string
suffix string
contains string
}{
{regex: "foo(hello|bar)", prefix: "foo", suffix: ""},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"},
{regex: "foo.*", prefix: "foo", suffix: ""},
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"},
{regex: ".*foo", prefix: "", suffix: "foo"},
{regex: "^.*foo$", prefix: "", suffix: "foo"},
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
}

for _, c := range cases {
parsed, err := syntax.Parse(c.regex, syntax.Perl)
testutil.Ok(t, err)

prefix, suffix := optimizeConcatRegex(parsed)
prefix, suffix, contains := optimizeConcatRegex(parsed)
testutil.Equals(t, c.prefix, prefix)
testutil.Equals(t, c.suffix, suffix)
testutil.Equals(t, c.contains, contains)
}
}
4 changes: 4 additions & 0 deletions tsdb/querier_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,14 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$")
iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$")
iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$")
iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "")
iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix)
iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$")

cases := []struct {
name string
Expand All @@ -120,8 +122,10 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
{`n="1",i!="",j="foo"`, []*labels.Matcher{n1, iNotEmpty, jFoo}},
{`n="1",i=~".+",j="foo"`, []*labels.Matcher{n1, iPlus, jFoo}},
{`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}},
{`n="1",i=~".*1.*",j="foo"`, []*labels.Matcher{n1, iStar1Star, jFoo}},
{`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}},
{`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}},
{`n="1",i=~".+",i!~".*2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNotStar2Star, jFoo}},
}

for _, c := range cases {
Expand Down

0 comments on commit 2f6bf7d

Please sign in to comment.