Skip to content

Commit

Permalink
Merge pull request prometheus#1373 from prometheus/fix-flapping-alert…
Browse files Browse the repository at this point in the history
…-detection

Fix detection of flapping alerts
  • Loading branch information
fabxc committed Feb 5, 2016
2 parents 2b9de9e + f1f8317 commit facabe2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 14 deletions.
4 changes: 2 additions & 2 deletions rules/alerting.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const (
type AlertState int

const (
// StateInactive is the state of an alert that is either firing nor pending.
// StateInactive is the state of an alert that is neither firing nor pending.
StateInactive AlertState = iota
// StatePending is the state of an alert that has been active for less than
// the configured threshold duration.
Expand Down Expand Up @@ -159,7 +159,7 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
fp := smpl.Metric.Fingerprint()
resultFPs[fp] = struct{}{}

if alert, ok := r.active[fp]; ok {
if alert, ok := r.active[fp]; ok && alert.State != StateInactive {
alert.Value = smpl.Value
continue
}
Expand Down
33 changes: 21 additions & 12 deletions rules/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,8 @@ import (
func TestAlertingRule(t *testing.T) {
suite, err := promql.NewTest(t, `
load 5m
http_requests{job="api-server", instance="0", group="production"} 0+10x10
http_requests{job="api-server", instance="1", group="production"} 0+20x10
http_requests{job="api-server", instance="0", group="canary"} 0+30x10
http_requests{job="api-server", instance="1", group="canary"} 0+40x10
http_requests{job="app-server", instance="0", group="production"} 0+50x10
http_requests{job="app-server", instance="1", group="production"} 0+60x10
http_requests{job="app-server", instance="0", group="canary"} 0+70x10
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
http_requests{job="app-server", instance="0", group="canary"} 75 85 95 105 105 95 85
http_requests{job="app-server", instance="1", group="canary"} 80 90 100 110 120 130 140
`)
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -79,17 +73,32 @@ func TestAlertingRule(t *testing.T) {
}, {
time: 10 * time.Minute,
result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
},
},
{
time: 15 * time.Minute,
result: nil,
time: 15 * time.Minute,
result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
},
},
{
time: 20 * time.Minute,
result: nil,
result: []string{},
},
{
time: 25 * time.Minute,
result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
},
},
{
time: 30 * time.Minute,
result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
},
},
}

Expand Down

0 comments on commit facabe2

Please sign in to comment.