Skip to content

Commit

Permalink
alerting v2beta1 tunes (kubesphere#5200)
Browse files Browse the repository at this point in the history
Signed-off-by: junot <junotxiang@kubesphere.io>

Signed-off-by: junot <junotxiang@kubesphere.io>
  • Loading branch information
junotx authored and sologgfun committed Apr 24, 2023
1 parent 9e62dfa commit 43e1576
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 15 deletions.
9 changes: 9 additions & 0 deletions pkg/api/alerting/v2beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ type RuleGroupStatus struct {
EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on rule group evaluation in seconds"`
LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"`
RulesStatus []RuleStatus `json:"rulesStatus,omitempty" description:"status of rules in one RuleGroup"`
RulesStats RulesStats `json:"rulesStats,omitempty" description:"statistics of rules in one RuleGroup"`
}

type RulesStats struct {
Inactive int `json:"inactive" description:"count of rules in the inactive state"`
Pending int `json:"pending" description:"count of rules in the pending state"`
Firing int `json:"firing" description:"count of rules in the firing state"`
Disabled int `json:"disabled" description:"count of disabled rules"`
}

type RuleStatus struct {
Expand All @@ -77,6 +85,7 @@ type RuleStatus struct {
LastError string `json:"lastError,omitempty" description:"error of the last evaluation"`
EvaluationTime *float64 `json:"evaluationTime,omitempty" description:"time spent on the expression evaluation in seconds"`
LastEvaluation *time.Time `json:"lastEvaluation,omitempty" description:"time of last evaluation"`
ActiveAt *time.Time `json:"activeAt,omitempty" description:"time when this rule became active"`

Alerts []*Alert `json:"alerts,omitempty" description:"alerts"`
}
Expand Down
31 changes: 22 additions & 9 deletions pkg/controller/alerting/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ const (
RuleLevelCluster RuleLevel = "cluster"
RuleLevelGlobal RuleLevel = "global"

// label keys in rule.labels
RuleLabelKeyRuleLevel = "rule_level"
RuleLabelKeyCluster = "cluster"
RuleLabelKeyNamespace = "namespace"
RuleLabelKeySeverity = "severity"
// for rule.labels
RuleLabelKeyRuleLevel = "rule_level"
RuleLabelKeyRuleGroup = "rule_group"
RuleLabelKeyCluster = "cluster"
RuleLabelKeyNamespace = "namespace"
RuleLabelKeySeverity = "severity"
RuleLabelKeyAlertType = "alerttype"
RuleLabelValueAlertTypeMetric = "metric"

// label keys in RuleGroup/ClusterRuleGroup/GlobalRuleGroup.metadata.labels
SourceGroupResourceLabelKeyEnable = "alerting.kubesphere.io/enable"
Expand Down Expand Up @@ -132,7 +135,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
commonEnforceFuncs ...enforceRuleFunc) ([]*promresourcesv1.RuleGroup, error) {
var rulegroups []*promresourcesv1.RuleGroup

convertRule := func(rule *alertingv2beta1.Rule, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) {
convertRule := func(rule *alertingv2beta1.Rule, groupName string, enforceFuncs ...enforceRuleFunc) (*promresourcesv1.Rule, error) {
if rule.Disable { // ignoring disabled rule
return nil, nil
}
Expand All @@ -156,6 +159,15 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
}

enforceFuncs = append(enforceFuncs, commonEnforceFuncs...)
// enforce rule group label and alert type label
enforceFuncs = append(enforceFuncs, func(rule *promresourcesv1.Rule) error {
if rule.Labels == nil {
rule.Labels = make(map[string]string)
}
rule.Labels[RuleLabelKeyRuleGroup] = groupName
rule.Labels[RuleLabelKeyAlertType] = RuleLabelValueAlertTypeMetric
return nil
})

for _, f := range enforceFuncs {
if f == nil {
Expand All @@ -175,7 +187,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
for _, group := range list.Items {
var prules []promresourcesv1.Rule
for _, rule := range group.Spec.Rules {
prule, err := convertRule(&rule.Rule)
prule, err := convertRule(&rule.Rule, group.Name)
if err != nil {
log.WithValues("rulegroup", group.Namespace+"/"+group.Name).Error(err, "failed to convert")
continue
Expand All @@ -195,7 +207,7 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
for _, group := range list.Items {
var prules []promresourcesv1.Rule
for _, rule := range group.Spec.Rules {
prule, err := convertRule(&rule.Rule)
prule, err := convertRule(&rule.Rule, group.Name)
if err != nil {
log.WithValues("clusterrulegroup", group.Name).Error(err, "failed to convert")
continue
Expand All @@ -216,7 +228,8 @@ func makePrometheusRuleGroups(log logr.Logger, groupList client.ObjectList,
var prules []promresourcesv1.Rule
for _, rule := range group.Spec.Rules {

prule, err := convertRule(&rule.Rule, createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...)
prule, err := convertRule(&rule.Rule, group.Name,
createEnforceRuleFuncs(ParseGlobalRuleEnforceMatchers(&rule), nil)...)
if err != nil {
log.WithValues("globalrulegroup", group.Name).Error(err, "failed to convert")
continue
Expand Down
75 changes: 69 additions & 6 deletions pkg/models/alerting/rulegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package alerting

import (
"context"
"time"

promlabels "github.com/prometheus/prometheus/pkg/labels"
promrules "github.com/prometheus/prometheus/rules"
Expand Down Expand Up @@ -129,7 +130,7 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string
return nil, err
}

return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
hit, great := o.compareRuleGroupStatus(
&(left.(*kapialertingv2beta1.RuleGroup).Status), &(right.(*kapialertingv2beta1.RuleGroup).Status), field)
if hit {
Expand All @@ -143,7 +144,32 @@ func (o *ruleGroupOperator) ListRuleGroups(ctx context.Context, namespace string
return selected
}
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.RuleGroup).ObjectMeta, filter)
}), nil
})

for i := range listResult.Items {
item := listResult.Items[i].(*kapialertingv2beta1.RuleGroup)
for j, ruleStatus := range item.Status.RulesStatus {
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
}
listResult.Items[i] = item
}

return listResult, nil
}

func updateRulesStats(rulesStats *kapialertingv2beta1.RulesStats, ruleDisable bool, ruleState string) {
if ruleDisable {
rulesStats.Disabled++
return
}
switch ruleState {
case stateInactiveString:
rulesStats.Inactive++
case statePendingString:
rulesStats.Pending++
case stateFiringString:
rulesStats.Firing++
}
}

// compareRuleGroupStatus compare rulegroup status.
Expand Down Expand Up @@ -299,6 +325,10 @@ func (o *ruleGroupOperator) GetRuleGroup(ctx context.Context, namespace, name st
}
}

for j, ruleStatus := range ret.Status.RulesStatus {
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
}

return ret, nil
}

Expand Down Expand Up @@ -366,7 +396,7 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context,
return nil, err
}

return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
hit, great := o.compareRuleGroupStatus(
&(left.(*kapialertingv2beta1.ClusterRuleGroup).Status), &(right.(*kapialertingv2beta1.ClusterRuleGroup).Status), field)
if hit {
Expand All @@ -380,7 +410,17 @@ func (o *ruleGroupOperator) ListClusterRuleGroups(ctx context.Context,
return selected
}
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.ClusterRuleGroup).ObjectMeta, filter)
}), nil
})

for i := range listResult.Items {
item := listResult.Items[i].(*kapialertingv2beta1.ClusterRuleGroup)
for j, ruleStatus := range item.Status.RulesStatus {
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
}
listResult.Items[i] = item
}

return listResult, nil
}

func (o *ruleGroupOperator) ListClusterAlerts(ctx context.Context,
Expand Down Expand Up @@ -456,6 +496,10 @@ func (o *ruleGroupOperator) GetClusterRuleGroup(ctx context.Context, name string
}
}

for j, ruleStatus := range ret.Status.RulesStatus {
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
}

return ret, nil
}

Expand Down Expand Up @@ -546,7 +590,7 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context,
return nil, err
}

return resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
listResult := resources.DefaultList(groups, queryParam, func(left, right runtime.Object, field query.Field) bool {
hit, great := o.compareRuleGroupStatus(
&(left.(*kapialertingv2beta1.GlobalRuleGroup).Status), &(right.(*kapialertingv2beta1.GlobalRuleGroup).Status), field)
if hit {
Expand All @@ -563,7 +607,17 @@ func (o *ruleGroupOperator) ListGlobalRuleGroups(ctx context.Context,
return selected
}
return resources.DefaultObjectMetaFilter(obj.(*kapialertingv2beta1.GlobalRuleGroup).ObjectMeta, filter)
}), nil
})

for i := range listResult.Items {
item := listResult.Items[i].(*kapialertingv2beta1.GlobalRuleGroup)
for j, ruleStatus := range item.Status.RulesStatus {
updateRulesStats(&item.Status.RulesStats, item.Spec.Rules[j].Disable, ruleStatus.State)
}
listResult.Items[i] = item
}

return listResult, nil
}

func (o *ruleGroupOperator) ListGlobalAlerts(ctx context.Context,
Expand Down Expand Up @@ -661,6 +715,10 @@ func (o *ruleGroupOperator) GetGlobalRuleGroup(ctx context.Context, name string)
}
}

for j, ruleStatus := range ret.Status.RulesStatus {
updateRulesStats(&ret.Status.RulesStats, ret.Spec.Rules[j].Disable, ruleStatus.State)
}

return ret, nil
}

Expand All @@ -677,6 +735,7 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
if ruleState := parseAlertState(rule.State); ruleState > groupState {
groupState = ruleState
}
var ruleActiveAt *time.Time
alerts := []*kapialertingv2beta1.Alert{}
for _, alert := range rule.Alerts {
alerts = append(alerts, &kapialertingv2beta1.Alert{
Expand All @@ -686,13 +745,17 @@ func copyRuleGroupStatus(source *alerting.RuleGroup, target *kapialertingv2beta1
State: alert.State,
Value: alert.Value,
})
if alert.ActiveAt != nil && (ruleActiveAt == nil || alert.ActiveAt.Before(*ruleActiveAt)) {
ruleActiveAt = alert.ActiveAt
}
}
ruleStatus := kapialertingv2beta1.RuleStatus{
State: rule.State,
Health: rule.Health,
LastError: rule.LastError,
EvaluationTime: rule.EvaluationTime,
LastEvaluation: rule.LastEvaluation,
ActiveAt: ruleActiveAt,
Alerts: alerts,
}
if len(rule.Labels) > 0 {
Expand Down

0 comments on commit 43e1576

Please sign in to comment.