Skip to content

Commit

Permalink
Allow swapping NotReady and Unschedulable Taints
Browse files Browse the repository at this point in the history
  • Loading branch information
gmarek committed Apr 4, 2017
1 parent 46d4c62 commit 576ad81
Show file tree
Hide file tree
Showing 5 changed files with 377 additions and 109 deletions.
38 changes: 34 additions & 4 deletions pkg/controller/node/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ import (
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
extensionslisters "k8s.io/kubernetes/pkg/client/listers/extensions/v1beta1"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/util/node"
nodepkg "k8s.io/kubernetes/pkg/util/node"
utilversion "k8s.io/kubernetes/pkg/util/version"

"github.com/golang/glog"
Expand Down Expand Up @@ -102,12 +103,12 @@ func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, n
// setPodTerminationReason attempts to set a reason and message in the pod status, updates it in the apiserver,
// and returns an error if it encounters one.
func setPodTerminationReason(kubeClient clientset.Interface, pod *v1.Pod, nodeName string) (*v1.Pod, error) {
if pod.Status.Reason == node.NodeUnreachablePodReason {
if pod.Status.Reason == nodepkg.NodeUnreachablePodReason {
return pod, nil
}

pod.Status.Reason = node.NodeUnreachablePodReason
pod.Status.Message = fmt.Sprintf(node.NodeUnreachablePodMessage, nodeName, pod.Name)
pod.Status.Reason = nodepkg.NodeUnreachablePodReason
pod.Status.Message = fmt.Sprintf(nodepkg.NodeUnreachablePodMessage, nodeName, pod.Name)

var updatedPod *v1.Pod
var err error
Expand Down Expand Up @@ -286,3 +287,32 @@ func recordNodeStatusChange(recorder record.EventRecorder, node *v1.Node, new_st
// and event is recorded or neither should happen, see issue #6055.
recorder.Eventf(ref, v1.EventTypeNormal, new_status, "Node %s status is now: %s", node.Name, new_status)
}

// Returns true in case of success and false otherwise
func swapNodeControllerTaint(kubeClient clientset.Interface, taintToAdd, taintToRemove *v1.Taint, node *v1.Node) bool {
taintToAdd.TimeAdded = metav1.Now()
err := controller.AddOrUpdateTaintOnNode(kubeClient, node.Name, taintToAdd)
if err != nil {
utilruntime.HandleError(
fmt.Errorf(
"unable to taint %v unresponsive Node %q: %v",
taintToAdd.Key,
node.Name,
err))
return false
}
glog.V(4).Infof("Added %v Taint to Node %v", taintToAdd, node.Name)

err = controller.RemoveTaintOffNode(kubeClient, node.Name, taintToRemove, node)
if err != nil {
utilruntime.HandleError(
fmt.Errorf(
"unable to remove %v unneeded taint from unresponsive Node %q: %v",
taintToRemove.Key,
node.Name,
err))
return false
}
glog.V(4).Infof("Made sure that Node %v has no %v Taint", node.Name, taintToRemove)
return true
}
177 changes: 84 additions & 93 deletions pkg/controller/node/nodecontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,74 @@ func NewNodeController(
return nc, nil
}

func (nc *NodeController) doEvictionPass() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zonePodEvictor {
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
nc.zonePodEvictor[k].Try(func(value TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
glog.Warningf("Node %v no longer present in nodeLister!", value.Value)
} else if err != nil {
glog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
} else {
zone := utilnode.GetZoneKey(node)
EvictionsNumber.WithLabelValues(zone).Inc()
}
nodeUid, _ := value.UID.(string)
remaining, err := deletePods(nc.kubeClient, nc.recorder, value.Value, nodeUid, nc.daemonSetStore)
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err))
return false, 0
}
if remaining {
glog.Infof("Pods awaiting deletion due to NodeController eviction")
}
return true, 0
})
}
}

func (nc *NodeController) doTaintingPass() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zoneNotReadyOrUnreachableTainer {
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
nc.zoneNotReadyOrUnreachableTainer[k].Try(func(value TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
glog.Warningf("Node %v no longer present in nodeLister!", value.Value)
return true, 0
} else if err != nil {
glog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
// retry in 50 millisecond
return false, 50 * time.Millisecond
} else {
zone := utilnode.GetZoneKey(node)
EvictionsNumber.WithLabelValues(zone).Inc()
}
_, condition := v1.GetNodeCondition(&node.Status, v1.NodeReady)
// Because we want to mimic NodeStatus.Condition["Ready"] we make "unreachable" and "not ready" taints mutually exclusive.
taintToAdd := v1.Taint{}
oppositeTaint := v1.Taint{}
if condition.Status == v1.ConditionFalse {
taintToAdd = *NotReadyTaintTemplate
oppositeTaint = *UnreachableTaintTemplate
} else if condition.Status == v1.ConditionUnknown {
taintToAdd = *UnreachableTaintTemplate
oppositeTaint = *NotReadyTaintTemplate
} else {
// It seems that the Node is ready again, so there's no need to taint it.
glog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
return true, 0
}

return swapNodeControllerTaint(nc.kubeClient, &taintToAdd, &oppositeTaint, node), 0
})
}
}

// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *NodeController) Run() {
go func() {
Expand All @@ -502,101 +570,12 @@ func (nc *NodeController) Run() {
if nc.useTaintBasedEvictions {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(func() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zoneNotReadyOrUnreachableTainer {
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
nc.zoneNotReadyOrUnreachableTainer[k].Try(func(value TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
glog.Warningf("Node %v no longer present in nodeLister!", value.Value)
return true, 0
} else if err != nil {
glog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
// retry in 50 millisecond
return false, 50 * time.Millisecond
} else {
zone := utilnode.GetZoneKey(node)
EvictionsNumber.WithLabelValues(zone).Inc()
}
_, condition := v1.GetNodeCondition(&node.Status, v1.NodeReady)
// Because we want to mimic NodeStatus.Condition["Ready"] we make "unreachable" and "not ready" taints mutually exclusive.
taintToAdd := v1.Taint{}
oppositeTaint := v1.Taint{}
if condition.Status == v1.ConditionFalse {
taintToAdd = *NotReadyTaintTemplate
oppositeTaint = *UnreachableTaintTemplate
} else if condition.Status == v1.ConditionUnknown {
taintToAdd = *UnreachableTaintTemplate
oppositeTaint = *NotReadyTaintTemplate
} else {
// It seems that the Node is ready again, so there's no need to taint it.
glog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
return true, 0
}

taintToAdd.TimeAdded = metav1.Now()
err = controller.AddOrUpdateTaintOnNode(nc.kubeClient, value.Value, &taintToAdd)
if err != nil {
utilruntime.HandleError(
fmt.Errorf(
"unable to taint %v unresponsive Node %q: %v",
taintToAdd.Key,
value.Value,
err))
return false, 0
} else {
glog.V(4).Info("Added %v Taint to Node %v", taintToAdd, value.Value)
}
err = controller.RemoveTaintOffNode(nc.kubeClient, value.Value, &oppositeTaint, node)
if err != nil {
utilruntime.HandleError(
fmt.Errorf(
"unable to remove %v unneeded taint from unresponsive Node %q: %v",
oppositeTaint.Key,
value.Value,
err))
return false, 0
} else {
glog.V(4).Info("Made sure that Node %v has no %v Taint", value.Value, oppositeTaint)
}
return true, 0
})
}
}, nodeEvictionPeriod, wait.NeverStop)
go wait.Until(nc.doTaintingPass, nodeEvictionPeriod, wait.NeverStop)
} else {
// Managing eviction of nodes:
// When we delete pods off a node, if the node was not empty at the time we then
// queue an eviction watcher. If we hit an error, retry deletion.
go wait.Until(func() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zonePodEvictor {
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
nc.zonePodEvictor[k].Try(func(value TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
glog.Warningf("Node %v no longer present in nodeLister!", value.Value)
} else if err != nil {
glog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
} else {
zone := utilnode.GetZoneKey(node)
EvictionsNumber.WithLabelValues(zone).Inc()
}
nodeUid, _ := value.UID.(string)
remaining, err := deletePods(nc.kubeClient, nc.recorder, value.Value, nodeUid, nc.daemonSetStore)
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err))
return false, 0
}
if remaining {
glog.Infof("Pods awaiting deletion due to NodeController eviction")
}
return true, 0
})
}
}, nodeEvictionPeriod, wait.NeverStop)
go wait.Until(nc.doEvictionPass, nodeEvictionPeriod, wait.NeverStop)
}
}()
}
Expand Down Expand Up @@ -685,7 +664,13 @@ func (nc *NodeController) monitorNodeStatus() error {
// Check eviction timeout against decisionTimestamp
if observedReadyCondition.Status == v1.ConditionFalse {
if nc.useTaintBasedEvictions {
if nc.markNodeForTainting(node) {
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if v1.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
taintToAdd := *NotReadyTaintTemplate
if !swapNodeControllerTaint(nc.kubeClient, &taintToAdd, UnreachableTaintTemplate, node) {
glog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node) {
glog.V(2).Infof("Node %v is NotReady as of %v. Adding it to the Taint queue.",
node.Name,
decisionTimestamp,
Expand All @@ -706,7 +691,13 @@ func (nc *NodeController) monitorNodeStatus() error {
}
if observedReadyCondition.Status == v1.ConditionUnknown {
if nc.useTaintBasedEvictions {
if nc.markNodeForTainting(node) {
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if v1.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate
if !swapNodeControllerTaint(nc.kubeClient, &taintToAdd, NotReadyTaintTemplate, node) {
glog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node) {
glog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.",
node.Name,
decisionTimestamp,
Expand Down
Loading

0 comments on commit 576ad81

Please sign in to comment.