Skip to content

Commit

Permalink
Merge pull request #29354 from xueqzhan/operator-degraded-4
Browse files Browse the repository at this point in the history
TRT-1575: Use catchall card for operator degraded exceptions for major operators
  • Loading branch information
openshift-merge-bot[bot] authored Dec 19, 2024
2 parents aa11e85 + 200ae23 commit 17f91e0
Showing 1 changed file with 35 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -102,25 +102,42 @@ func testStableSystemOperatorStateTransitions(events monitorapi.Intervals, clien
if operator == "cloud-controller-manager" && condition.Reason == "SyncingFailed" {
return "https://issues.redhat.com/browse/OCPBUGS-42837", nil
}
if operator == "cloud-credential" {
return "https://issues.redhat.com/browse/OCPBUGS-42872", nil
}
if operator == "dns" && condition.Reason == "DNSDegraded" {
return "https://issues.redhat.com/browse/OCPBUGS-38750", nil
}
if operator == "etcd" && (condition.Reason == "EtcdMembers_UnhealthyMembers" || condition.Reason == "NodeInstaller_InstallerPodFailed") {
if operator == "etcd" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if operator == "network" && (condition.Reason == "ApplyOperatorConfig" || condition.Reason == "RenderError") {
if operator == "ingress" && condition.Reason == "IngressDegraded" {
return "https://issues.redhat.com/browse/OCPBUGS-45921", nil
}
if operator == "kube-apiserver" {
return "https://issues.redhat.com/browse/OCPBUGS-38661", nil
}
if operator == "kube-controller-manager" {
return "https://issues.redhat.com/browse/OCPBUGS-38662", nil
}
if operator == "kube-scheduler" {
return "https://issues.redhat.com/browse/OCPBUGS-38663", nil
}
if operator == "network" {
return "https://issues.redhat.com/browse/OCPBUGS-38684", nil
}
if operator == "machine-config" &&
(condition.Reason == "MachineConfigDaemonFailed" ||
condition.Reason == "MachineConfigurationFailed" ||
condition.Reason == "MachineConfigNodeFailed" ||
condition.Reason == "MachineConfigControllerFailed") {
return "https://issues.redhat.com/browse/OCPBUGS-38749", nil
if operator == "machine-config" {
return "https://issues.redhat.com/browse/MCO-1447", nil
}
if operator == "authentication" && (condition.Reason == "OAuthServerDeployment_UnavailablePod" || condition.Reason == "WellKnownReadyController_SyncError") {
if operator == "authentication" {
return "https://issues.redhat.com/browse/OCPBUGS-38675", nil
}
if operator == "console" {
return "https://issues.redhat.com/browse/OCPBUGS-38676", nil
}
if operator == "cluster-autoscaler" {
return "https://issues.redhat.com/browse/OCPBUGS-42875", nil
}
// flake this to collect more exceptions
return "https://issues.redhat.com/browse/TRT-1575", nil
}
Expand Down Expand Up @@ -258,7 +275,7 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
return "", nil
}
case "monitoring":
if condition.Type == configv1.OperatorDegraded && (condition.Status == configv1.ConditionTrue && condition.Reason == "UpdatingPrometheusFailed") {
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/OCPBUGS-39026", nil
}
case "network":
Expand All @@ -284,7 +301,7 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf

switch operator {
case "authentication":
if isSingleNode && condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/OCPBUGS-38675", nil
} else if checkAuthenticationAvailableExceptions(condition) {
return "https://issues.redhat.com/browse/OCPBUGS-20056", nil
Expand All @@ -304,7 +321,7 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
return "https://issues.redhat.com/browse/OCPBUGS-42872", nil
}
case "console":
if isSingleNode && condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/OCPBUGS-38676", nil
} else if condition.Type == configv1.OperatorAvailable && condition.Status == configv1.ConditionFalse &&
(condition.Reason == "RouteHealth_FailedGet" ||
Expand Down Expand Up @@ -335,12 +352,8 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
if condition.Type == configv1.OperatorAvailable && condition.Status == configv1.ConditionFalse && strings.Contains(condition.Message, "missing HTTP content-type") {
return "https://issues.redhat.com/browse/OCPBUGS-24228", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue &&
(condition.Reason == "MachineConfigDaemonFailed" ||
condition.Reason == "RenderConfigFailed" ||
condition.Reason == "MachineConfigPoolsFailed" ||
condition.Reason == "MachineConfigControllerFailed") {
return "https://issues.redhat.com/browse/OCPBUGS-39199", nil
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/MCO-1447", nil
}
case "monitoring":
if condition.Type == configv1.OperatorAvailable &&
Expand All @@ -353,8 +366,7 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
(condition.Status == configv1.ConditionUnknown && condition.Reason == "UpdatingPrometheusFailed") {
return "https://issues.redhat.com/browse/OCPBUGS-23745", nil
}
if condition.Type == configv1.OperatorDegraded &&
(condition.Status == configv1.ConditionTrue && (condition.Reason == "UpdatingPrometheusFailed" || condition.Reason == "UpdatingPrometheusOperatorFailed")) {
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/OCPBUGS-39026", nil
}
case "openshift-apiserver":
Expand Down Expand Up @@ -388,45 +400,11 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
return "https://issues.redhat.com/browse/OCPBUGS-38666", nil
}
case "etcd":
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdCertSignerController_Error::EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembers_UnhealthyMembers::NodeController_MasterNodesReady::TargetConfigController_SynchronizationError" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdCertSignerController_Error::EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembers_UnhealthyMembers::TargetConfigController_SynchronizationError" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdCertSignerController_Error::EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembersController_ErrorUpdatingReportEtcdMembers::EtcdMembers_UnhealthyMembers::TargetConfigController_SynchronizationError" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembers_UnhealthyMembers::TargetConfigController_SynchronizationError" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "NodeController_MasterNodesReady::StaticPods_Error" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdMembers_UnhealthyMembers::NodeController_MasterNodesReady::StaticPods_Error" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdCertSignerController_Error::EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembers_UnhealthyMembers::NodeController_MasterNodesReady::StaticPods_Error::TargetConfigController_SynchronizationError" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdCertSignerController_Error::EtcdEndpoints_ErrorUpdatingEtcdEndpoints::TargetConfigController_SynchronizationError" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "Unknown" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "EtcdMembers_UnhealthyMembers" {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "ClusterMemberController_SyncError::EtcdMembersController_ErrorUpdatingReportEtcdMembers::EtcdMembers_UnhealthyMembers" {
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/OCPBUGS-38659", nil
}
case "network":
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "ApplyOperatorConfig" {
return "https://issues.redhat.com/browse/OCPBUGS-38668", nil
}
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue &&
(condition.Reason == "NoOperConfig" || condition.Reason == "BootstrapError" || condition.Reason == "RenderError" || condition.Reason == "RolloutHung") {
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
return "https://issues.redhat.com/browse/OCPBUGS-38668", nil
}
case "openshift-samples":
Expand All @@ -438,11 +416,7 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
if isSingleNode && condition.Reason == "NodeInstaller_InstallerPodFailed" {
return "https://issues.redhat.com/browse/OCPBUGS-38678", nil
}
if condition.Reason == "NodeController_MasterNodesReady::StaticPods_Error" ||
condition.Reason == "NodeController_MasterNodesReady" ||
condition.Reason == "KubeAPIServerStaticResources_SyncError" {
return "https://issues.redhat.com/browse/OCPBUGS-38661", nil
}
return "https://issues.redhat.com/browse/OCPBUGS-38661", nil
}
case "kube-controller-manager":
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue && condition.Reason == "NodeController_MasterNodesReady" {
Expand Down

0 comments on commit 17f91e0

Please sign in to comment.