From 24d658214d321842cbaf220accb1e6243be6b1ac Mon Sep 17 00:00:00 2001 From: Vincent Marguerie <24724195+vincentmrg@users.noreply.github.com> Date: Fri, 16 Feb 2024 11:04:36 +0100 Subject: [PATCH] Fix/externalip node conflicts (#121) * Perform a single ExternalIP list call in node reconciliation * Handle reconciliation interval on node autoassigned eips - Add a minimum interval between reconciliations of a node to avoid conflicts - Add a requeue interval to reattach an eip to nodes that no longer have one * Set node reconciliation intervals configurable --- README.md | 16 ++-- controllers/externalip_controller.go | 13 +-- controllers/node_controller.go | 103 ++++++++++++++-------- helm/kubestatic/Chart.yaml | 4 +- helm/kubestatic/templates/deployment.yaml | 2 + helm/kubestatic/values.yaml | 6 ++ main.go | 25 ++++-- 7 files changed, 107 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 2dc795c..a075b48 100644 --- a/README.md +++ b/README.md @@ -74,13 +74,15 @@ If you want to automatically attach an external IP to certain nodes of your clus ### Optional args The kubestatic container takes as argument the parameters below. -| Key | Description | Default | -| ------------------------- | --------------------------------------------------------------------------------------------------------------------- | ------- | -| cloud-provider | Which cloud provider to deploy to. Available values: aws | "" | -| metrics-bind-address | The address the metric endpoint binds to. | :8080 | -| health-probe-bind-address | The address the probe endpoint binds to. | :8081 | -| leader-elect | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. | `false` | -| prevent-eip-deallocation | Prevent EIP deallocation on nodes auto-assigned ExternalIPs. | `false` | +| Key | Description | Default | +| ------------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------- | +| cloud-provider | Which cloud provider to deploy to. Available values: aws | "" | +| metrics-bind-address | The address the metric endpoint binds to. | :8080 | +| health-probe-bind-address | The address the probe endpoint binds to. | :8081 | +| leader-elect | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. | `false` | +| prevent-eip-deallocation | Prevent EIP deallocation on nodes auto-assigned ExternalIPs. | `false` | +| node-min-reconciliation-interval | The minimum duration to wait between two reconciliations for the same node. | 10s | +| node-reconciliation-requeue-interval | The duration for which nodes are automatically reconciled. | 1m | ## License diff --git a/controllers/externalip_controller.go b/controllers/externalip_controller.go index 5a07f76..98aa8d6 100644 --- a/controllers/externalip_controller.go +++ b/controllers/externalip_controller.go @@ -351,18 +351,19 @@ func (r *ExternalIPReconciler) SetupWithManager(mgr ctrl.Manager) error { log := r.Log.WithName("nodemapper") node := o.(*corev1.Node) - // List ExternalIPs that matches node name - log.V(1).Info("List all ExternalIP for node") + log.V(1).Info("List all ExternalIP") externalIPs := &v1alpha1.ExternalIPList{} - if err := r.Client.List(ctx, externalIPs, client.MatchingFields{externalIPNodeNameField: node.Name}); err != nil { + if err := r.Client.List(ctx, externalIPs); err != nil { log.Error(err, "Unable to list ExternalIP resources", "nodeName", node.Name) return []reconcile.Request{} } // Reconcile each matching ExternalIP - res := make([]reconcile.Request, len(externalIPs.Items)) - for i, e := range externalIPs.Items { - res[i] = reconcile.Request{NamespacedName: types.NamespacedName{Name: e.Name}} + res := []reconcile.Request{} + for _, eip := range externalIPs.Items { + if eip.Spec.NodeName == node.Name { + res = append(res, reconcile.Request{NamespacedName: types.NamespacedName{Name: eip.Name}}) + } } return res }), diff --git a/controllers/node_controller.go b/controllers/node_controller.go index b262808..6b301ae 100644 --- a/controllers/node_controller.go +++ b/controllers/node_controller.go @@ -18,12 +18,14 @@ package controllers import ( "context" + "time" "github.com/go-logr/logr" "github.com/google/uuid" "github.com/quortex/kubestatic/api/v1alpha1" "github.com/quortex/kubestatic/pkg/helper" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" @@ -40,16 +42,17 @@ const ( externalIPAutoAssignLabel = "kubestatic.quortex.io/externalip-auto-assign" // externalIPLabel is the key for auto externalIP label (the externalIP a pod should have) externalIPLabel = "kubestatic.quortex.io/externalip" - // externalIPNodeNameField is the nodeName field in ExternalIP resource - externalIPNodeNameField = ".spec.nodeName" ) // NodeReconciler reconciles a Node object type NodeReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme - PreventEIPDeallocation bool + Log logr.Logger + Scheme *runtime.Scheme + PreventEIPDeallocation bool + MinReconciliationInterval time.Duration + ReconciliationRequeueInterval time.Duration + lastReconciliation map[string]time.Time } //+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;update;patch @@ -64,41 +67,54 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. log.V(1).Info("Node reconciliation started") defer log.V(1).Info("Node reconciliation done") + node := &corev1.Node{} + if err := r.Get(ctx, req.NamespacedName, node); err != nil { + if errors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + // Return and don't requeue + log.Info("ExternalIP resource not found. Ignoring since object must be deleted") + delete(r.lastReconciliation, req.Name) + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + log.Error(err, "Failed to get Node") + return ctrl.Result{}, err + } + + // Store reconciliation time to handle reconciliation interval + r.lastReconciliation[req.Name] = time.Now() + // List auto assigned ExternalIPs for reconciled node log.V(1).Info("List all ExternalIPs for node") externalIPs := &v1alpha1.ExternalIPList{} if err := r.Client.List( ctx, externalIPs, - client.MatchingFields{externalIPNodeNameField: req.Name}, - client.MatchingLabels{externalIPAutoAssignLabel: "true"}, ); err != nil { log.Error(err, "Unable to list ExternalIP resources", "nodeName", req.Name) return ctrl.Result{}, err } - // Already existing ExternalIPs for this node, end reconciliation - if len(externalIPs.Items) > 0 { - log.V(1).Info("Already associated ExternalIP, aborting") - return ctrl.Result{}, nil - } - - // List orphaned auto assigned ExternalIPs to reuse it - log.V(1).Info("Listing orphaned auto assigned ExternalIPs") - if err := r.Client.List( - ctx, - externalIPs, - client.MatchingFields{externalIPNodeNameField: ""}, - client.MatchingLabels{externalIPAutoAssignLabel: "true"}, - ); err != nil { - log.Error(err, "Unable to list ExternalIP resources", "nodeName", "") - return ctrl.Result{}, err + // Check for existing eip and filter orphaned ones + orphanedEIPs := []v1alpha1.ExternalIP{} + for _, eip := range externalIPs.Items { + if eip.Labels[externalIPAutoAssignLabel] != "true" { + continue + } + // Already existing ExternalIPs for this node, end reconciliation + if eip.Spec.NodeName == req.Name { + log.V(1).Info("Already associated ExternalIP, aborting") + return ctrl.Result{RequeueAfter: r.ReconciliationRequeueInterval}, nil + } + if eip.Spec.NodeName == "" { + orphanedEIPs = append(orphanedEIPs, eip) + } } // Some orphaned auto assigned ExternalIPs, check which one to reuse - if len(externalIPs.Items) > 0 { + if len(orphanedEIPs) > 0 { // List pods that should be scheduled on orphaned ExternalIPs - publicIPAddresses := publicIPAddresses(externalIPs.Items) + publicIPAddresses := publicIPAddresses(orphanedEIPs) requirement, err := labels.NewRequirement(externalIPLabel, selection.In, publicIPAddresses) if err != nil { @@ -115,14 +131,17 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. } // We get the most referenced ExternalIP ore reuse the first one arbitrarily - externalIP := getMostReferencedIP(podList.Items, externalIPs.Items) + externalIP := getMostReferencedIP(podList.Items, orphanedEIPs) if externalIP == nil { - externalIP = &externalIPs.Items[0] + externalIP = &orphanedEIPs[0] log.V(1).Info("No used ExternalIP found, fallback on using the first") } externalIP.Spec.NodeName = req.Name log.V(1).Info("Associating ExternalIP to node", "externalIP", externalIP.Name) - return ctrl.Result{}, r.Update(ctx, externalIP) + if err := r.Update(ctx, externalIP); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: r.ReconciliationRequeueInterval}, nil } // No ExternalIP to reuse, creating a new one. @@ -142,19 +161,12 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, err } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: r.ReconciliationRequeueInterval}, nil } // SetupWithManager sets up the controller with the Manager. func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error { - // Index ExternalIP NodeName to list only ExternalIPs assigned to Node. - if err := mgr.GetFieldIndexer().IndexField(context.Background(), &v1alpha1.ExternalIP{}, externalIPNodeNameField, func(o client.Object) []string { - externalIP := o.(*v1alpha1.ExternalIP) - return []string{externalIP.Spec.NodeName} - }); err != nil { - return err - } - + r.lastReconciliation = map[string]time.Time{} return ctrl.NewControllerManagedBy(mgr). For(&corev1.Node{}, r.nodeReconciliationPredicates()). Complete(r) @@ -178,5 +190,20 @@ func (r *NodeReconciler) nodeReconciliationPredicates() builder.Predicates { // shouldReconcileNode returns if given Node should be reconciled by the controller. func (r *NodeReconciler) shouldReconcileNode(obj *corev1.Node) bool { // We should consider reconciliation for nodes with automatic IP assignment label. - return helper.ContainsElements(obj.ObjectMeta.Labels, map[string]string{externalIPAutoAssignLabel: "true"}) + if !helper.ContainsElements(obj.ObjectMeta.Labels, map[string]string{externalIPAutoAssignLabel: "true"}) { + return false + } + + // In the case of close consecutive reconciliations for the same Node, if an + // ExternalIP is created for this node the cached ExternalIP list may not have been + // notified of the fact that there is a new ExternalIP when the following + // reconciliation for this Node occurs. + // This can lead to the creation of unwanted ExternalIPs for this Node. + // Prevent frequent reconciliations is a workaround to ensure that the list of + // cached ExternalIPs is the correct one. + lastRec, ok := r.lastReconciliation[obj.Name] + if ok && time.Since(lastRec) < r.MinReconciliationInterval { + return false + } + return true } diff --git a/helm/kubestatic/Chart.yaml b/helm/kubestatic/Chart.yaml index 53a14ed..7918182 100644 --- a/helm/kubestatic/Chart.yaml +++ b/helm/kubestatic/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.9.2 +version: 0.10.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: 0.9.2 +appVersion: 0.10.0 diff --git a/helm/kubestatic/templates/deployment.yaml b/helm/kubestatic/templates/deployment.yaml index 430581a..7289996 100644 --- a/helm/kubestatic/templates/deployment.yaml +++ b/helm/kubestatic/templates/deployment.yaml @@ -38,6 +38,8 @@ spec: - --metrics-bind-address={{ ternary "127.0.0.1:8080" "0.0.0.0:8080" $.Values.kubeRBACProxy.enabled }} - --leader-elect - --cloud-provider={{ required "A valid .Values.cloudProvider entry required!" $.Values.cloudProvider }} + - --node-min-reconciliation-interval={{ $.Values.nodeMinReconciliationInterval }} + - --node-reconciliation-requeue-interval={{ $.Values.nodeReconciliationRequeueInterval }} {{- if $.Values.preventEIPDeallocation }} - --prevent-eip-deallocation {{- end }} diff --git a/helm/kubestatic/values.yaml b/helm/kubestatic/values.yaml index 957d9cd..e0785fc 100644 --- a/helm/kubestatic/values.yaml +++ b/helm/kubestatic/values.yaml @@ -8,6 +8,12 @@ cloudProvider: aws # -- Prevent EIP deallocation on nodes auto-assigned ExternalIPs. preventEIPDeallocation: false +# -- The minimum duration to wait between two reconciliations for the same node. +nodeMinReconciliationInterval: 10s + +# -- The duration for which nodes are automatically reconciled. +nodeReconciliationRequeueInterval: 1m + aws: # -- the region in which the cluster resides. region: "" diff --git a/main.go b/main.go index 6732309..ac52922 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "strings" + "time" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. @@ -62,11 +63,13 @@ func init() { func main() { var ( - fMetricsAddr string - fEnableLeaderElection bool - fProbeAddr string - fCloudProvider string - fPreventEIPDeallocation bool + fMetricsAddr string + fEnableLeaderElection bool + fProbeAddr string + fCloudProvider string + fPreventEIPDeallocation bool + fNodeMinReconciliationInterval time.Duration + fNodeReconciliationRequeueInterval time.Duration ) flag.StringVar(&fMetricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&fProbeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") @@ -75,6 +78,8 @@ func main() { "Enabling this will ensure there is only one active controller manager.") flag.StringVar(&fCloudProvider, "cloud-provider", "aws", "Cloud provider type. Available values: ["+strings.Join(availableProviders, ",")+"]") flag.BoolVar(&fPreventEIPDeallocation, "prevent-eip-deallocation", false, "Prevent EIP deallocation on nodes auto-assigned ExternalIPs.") + flag.DurationVar(&fNodeMinReconciliationInterval, "node-min-reconciliation-interval", 10*time.Second, "The minimum duration to wait between two reconciliations for the same node.") + flag.DurationVar(&fNodeReconciliationRequeueInterval, "node-reconciliation-requeue-interval", 1*time.Minute, "The duration for which nodes are automatically reconciled.") opts := zap.Options{} opts.BindFlags(flag.CommandLine) flag.Parse() @@ -120,10 +125,12 @@ func main() { os.Exit(1) } if err = (&controllers.NodeReconciler{ - Client: mgr.GetClient(), - Log: ctrl.Log.WithName("controllers").WithName("Node"), - Scheme: mgr.GetScheme(), - PreventEIPDeallocation: fPreventEIPDeallocation, + Client: mgr.GetClient(), + Log: ctrl.Log.WithName("controllers").WithName("Node"), + Scheme: mgr.GetScheme(), + PreventEIPDeallocation: fPreventEIPDeallocation, + MinReconciliationInterval: fNodeMinReconciliationInterval, + ReconciliationRequeueInterval: fNodeReconciliationRequeueInterval, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Node") os.Exit(1)