Skip to content

Commit

Permalink
Fix/externalip node conflicts (#121)
Browse files Browse the repository at this point in the history
* Perform a single ExternalIP list call in node reconciliation

* Handle reconciliation interval on node autoassigned eips

- Add a minimum interval between reconciliations of a node to avoid conflicts
- Add a requeue interval to reattach an eip to nodes that no longer have one

* Set node reconciliation intervals configurable
  • Loading branch information
vincentmrg authored Feb 16, 2024
1 parent f69acdf commit 24d6582
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 62 deletions.
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,15 @@ If you want to automatically attach an external IP to certain nodes of your clus

### Optional args
The kubestatic container takes as argument the parameters below.
| Key | Description | Default |
| ------------------------- | --------------------------------------------------------------------------------------------------------------------- | ------- |
| cloud-provider | Which cloud provider to deploy to. Available values: aws | "" |
| metrics-bind-address | The address the metric endpoint binds to. | :8080 |
| health-probe-bind-address | The address the probe endpoint binds to. | :8081 |
| leader-elect | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. | `false` |
| prevent-eip-deallocation | Prevent EIP deallocation on nodes auto-assigned ExternalIPs. | `false` |
| Key | Description | Default |
| ------------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------- |
| cloud-provider | Which cloud provider to deploy to. Available values: aws | "" |
| metrics-bind-address | The address the metric endpoint binds to. | :8080 |
| health-probe-bind-address | The address the probe endpoint binds to. | :8081 |
| leader-elect | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. | `false` |
| prevent-eip-deallocation | Prevent EIP deallocation on nodes auto-assigned ExternalIPs. | `false` |
| node-min-reconciliation-interval | The minimum duration to wait between two reconciliations for the same node. | 10s |
| node-reconciliation-requeue-interval | The duration for which nodes are automatically reconciled. | 1m |


## License
Expand Down
13 changes: 7 additions & 6 deletions controllers/externalip_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,18 +351,19 @@ func (r *ExternalIPReconciler) SetupWithManager(mgr ctrl.Manager) error {
log := r.Log.WithName("nodemapper")
node := o.(*corev1.Node)

// List ExternalIPs that matches node name
log.V(1).Info("List all ExternalIP for node")
log.V(1).Info("List all ExternalIP")
externalIPs := &v1alpha1.ExternalIPList{}
if err := r.Client.List(ctx, externalIPs, client.MatchingFields{externalIPNodeNameField: node.Name}); err != nil {
if err := r.Client.List(ctx, externalIPs); err != nil {
log.Error(err, "Unable to list ExternalIP resources", "nodeName", node.Name)
return []reconcile.Request{}
}

// Reconcile each matching ExternalIP
res := make([]reconcile.Request, len(externalIPs.Items))
for i, e := range externalIPs.Items {
res[i] = reconcile.Request{NamespacedName: types.NamespacedName{Name: e.Name}}
res := []reconcile.Request{}
for _, eip := range externalIPs.Items {
if eip.Spec.NodeName == node.Name {
res = append(res, reconcile.Request{NamespacedName: types.NamespacedName{Name: eip.Name}})
}
}
return res
}),
Expand Down
103 changes: 65 additions & 38 deletions controllers/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ package controllers

import (
"context"
"time"

"github.com/go-logr/logr"
"github.com/google/uuid"
"github.com/quortex/kubestatic/api/v1alpha1"
"github.com/quortex/kubestatic/pkg/helper"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -40,16 +42,17 @@ const (
externalIPAutoAssignLabel = "kubestatic.quortex.io/externalip-auto-assign"
// externalIPLabel is the key for auto externalIP label (the externalIP a pod should have)
externalIPLabel = "kubestatic.quortex.io/externalip"
// externalIPNodeNameField is the nodeName field in ExternalIP resource
externalIPNodeNameField = ".spec.nodeName"
)

// NodeReconciler reconciles a Node object
type NodeReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
PreventEIPDeallocation bool
Log logr.Logger
Scheme *runtime.Scheme
PreventEIPDeallocation bool
MinReconciliationInterval time.Duration
ReconciliationRequeueInterval time.Duration
lastReconciliation map[string]time.Time
}

//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;update;patch
Expand All @@ -64,41 +67,54 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
log.V(1).Info("Node reconciliation started")
defer log.V(1).Info("Node reconciliation done")

node := &corev1.Node{}
if err := r.Get(ctx, req.NamespacedName, node); err != nil {
if errors.IsNotFound(err) {
// Request object not found, could have been deleted after reconcile request.
// Return and don't requeue
log.Info("ExternalIP resource not found. Ignoring since object must be deleted")
delete(r.lastReconciliation, req.Name)
return ctrl.Result{}, nil
}
// Error reading the object - requeue the request.
log.Error(err, "Failed to get Node")
return ctrl.Result{}, err
}

// Store reconciliation time to handle reconciliation interval
r.lastReconciliation[req.Name] = time.Now()

// List auto assigned ExternalIPs for reconciled node
log.V(1).Info("List all ExternalIPs for node")
externalIPs := &v1alpha1.ExternalIPList{}
if err := r.Client.List(
ctx,
externalIPs,
client.MatchingFields{externalIPNodeNameField: req.Name},
client.MatchingLabels{externalIPAutoAssignLabel: "true"},
); err != nil {
log.Error(err, "Unable to list ExternalIP resources", "nodeName", req.Name)
return ctrl.Result{}, err
}

// Already existing ExternalIPs for this node, end reconciliation
if len(externalIPs.Items) > 0 {
log.V(1).Info("Already associated ExternalIP, aborting")
return ctrl.Result{}, nil
}

// List orphaned auto assigned ExternalIPs to reuse it
log.V(1).Info("Listing orphaned auto assigned ExternalIPs")
if err := r.Client.List(
ctx,
externalIPs,
client.MatchingFields{externalIPNodeNameField: ""},
client.MatchingLabels{externalIPAutoAssignLabel: "true"},
); err != nil {
log.Error(err, "Unable to list ExternalIP resources", "nodeName", "")
return ctrl.Result{}, err
// Check for existing eip and filter orphaned ones
orphanedEIPs := []v1alpha1.ExternalIP{}
for _, eip := range externalIPs.Items {
if eip.Labels[externalIPAutoAssignLabel] != "true" {
continue
}
// Already existing ExternalIPs for this node, end reconciliation
if eip.Spec.NodeName == req.Name {
log.V(1).Info("Already associated ExternalIP, aborting")
return ctrl.Result{RequeueAfter: r.ReconciliationRequeueInterval}, nil
}
if eip.Spec.NodeName == "" {
orphanedEIPs = append(orphanedEIPs, eip)
}
}

// Some orphaned auto assigned ExternalIPs, check which one to reuse
if len(externalIPs.Items) > 0 {
if len(orphanedEIPs) > 0 {
// List pods that should be scheduled on orphaned ExternalIPs
publicIPAddresses := publicIPAddresses(externalIPs.Items)
publicIPAddresses := publicIPAddresses(orphanedEIPs)

requirement, err := labels.NewRequirement(externalIPLabel, selection.In, publicIPAddresses)
if err != nil {
Expand All @@ -115,14 +131,17 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
}

// We get the most referenced ExternalIP ore reuse the first one arbitrarily
externalIP := getMostReferencedIP(podList.Items, externalIPs.Items)
externalIP := getMostReferencedIP(podList.Items, orphanedEIPs)
if externalIP == nil {
externalIP = &externalIPs.Items[0]
externalIP = &orphanedEIPs[0]
log.V(1).Info("No used ExternalIP found, fallback on using the first")
}
externalIP.Spec.NodeName = req.Name
log.V(1).Info("Associating ExternalIP to node", "externalIP", externalIP.Name)
return ctrl.Result{}, r.Update(ctx, externalIP)
if err := r.Update(ctx, externalIP); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{RequeueAfter: r.ReconciliationRequeueInterval}, nil
}

// No ExternalIP to reuse, creating a new one.
Expand All @@ -142,19 +161,12 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
return ctrl.Result{RequeueAfter: r.ReconciliationRequeueInterval}, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
// Index ExternalIP NodeName to list only ExternalIPs assigned to Node.
if err := mgr.GetFieldIndexer().IndexField(context.Background(), &v1alpha1.ExternalIP{}, externalIPNodeNameField, func(o client.Object) []string {
externalIP := o.(*v1alpha1.ExternalIP)
return []string{externalIP.Spec.NodeName}
}); err != nil {
return err
}

r.lastReconciliation = map[string]time.Time{}
return ctrl.NewControllerManagedBy(mgr).
For(&corev1.Node{}, r.nodeReconciliationPredicates()).
Complete(r)
Expand All @@ -178,5 +190,20 @@ func (r *NodeReconciler) nodeReconciliationPredicates() builder.Predicates {
// shouldReconcileNode returns if given Node should be reconciled by the controller.
func (r *NodeReconciler) shouldReconcileNode(obj *corev1.Node) bool {
// We should consider reconciliation for nodes with automatic IP assignment label.
return helper.ContainsElements(obj.ObjectMeta.Labels, map[string]string{externalIPAutoAssignLabel: "true"})
if !helper.ContainsElements(obj.ObjectMeta.Labels, map[string]string{externalIPAutoAssignLabel: "true"}) {
return false
}

// In the case of close consecutive reconciliations for the same Node, if an
// ExternalIP is created for this node the cached ExternalIP list may not have been
// notified of the fact that there is a new ExternalIP when the following
// reconciliation for this Node occurs.
// This can lead to the creation of unwanted ExternalIPs for this Node.
// Prevent frequent reconciliations is a workaround to ensure that the list of
// cached ExternalIPs is the correct one.
lastRec, ok := r.lastReconciliation[obj.Name]
if ok && time.Since(lastRec) < r.MinReconciliationInterval {
return false
}
return true
}
4 changes: 2 additions & 2 deletions helm/kubestatic/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.9.2
version: 0.10.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: 0.9.2
appVersion: 0.10.0
2 changes: 2 additions & 0 deletions helm/kubestatic/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ spec:
- --metrics-bind-address={{ ternary "127.0.0.1:8080" "0.0.0.0:8080" $.Values.kubeRBACProxy.enabled }}
- --leader-elect
- --cloud-provider={{ required "A valid .Values.cloudProvider entry required!" $.Values.cloudProvider }}
- --node-min-reconciliation-interval={{ $.Values.nodeMinReconciliationInterval }}
- --node-reconciliation-requeue-interval={{ $.Values.nodeReconciliationRequeueInterval }}
{{- if $.Values.preventEIPDeallocation }}
- --prevent-eip-deallocation
{{- end }}
Expand Down
6 changes: 6 additions & 0 deletions helm/kubestatic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ cloudProvider: aws
# -- Prevent EIP deallocation on nodes auto-assigned ExternalIPs.
preventEIPDeallocation: false

# -- The minimum duration to wait between two reconciliations for the same node.
nodeMinReconciliationInterval: 10s

# -- The duration for which nodes are automatically reconciled.
nodeReconciliationRequeueInterval: 1m

aws:
# -- the region in which the cluster resides.
region: ""
Expand Down
25 changes: 16 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"os"
"strings"
"time"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
Expand Down Expand Up @@ -62,11 +63,13 @@ func init() {

func main() {
var (
fMetricsAddr string
fEnableLeaderElection bool
fProbeAddr string
fCloudProvider string
fPreventEIPDeallocation bool
fMetricsAddr string
fEnableLeaderElection bool
fProbeAddr string
fCloudProvider string
fPreventEIPDeallocation bool
fNodeMinReconciliationInterval time.Duration
fNodeReconciliationRequeueInterval time.Duration
)
flag.StringVar(&fMetricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&fProbeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
Expand All @@ -75,6 +78,8 @@ func main() {
"Enabling this will ensure there is only one active controller manager.")
flag.StringVar(&fCloudProvider, "cloud-provider", "aws", "Cloud provider type. Available values: ["+strings.Join(availableProviders, ",")+"]")
flag.BoolVar(&fPreventEIPDeallocation, "prevent-eip-deallocation", false, "Prevent EIP deallocation on nodes auto-assigned ExternalIPs.")
flag.DurationVar(&fNodeMinReconciliationInterval, "node-min-reconciliation-interval", 10*time.Second, "The minimum duration to wait between two reconciliations for the same node.")
flag.DurationVar(&fNodeReconciliationRequeueInterval, "node-reconciliation-requeue-interval", 1*time.Minute, "The duration for which nodes are automatically reconciled.")
opts := zap.Options{}
opts.BindFlags(flag.CommandLine)
flag.Parse()
Expand Down Expand Up @@ -120,10 +125,12 @@ func main() {
os.Exit(1)
}
if err = (&controllers.NodeReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("Node"),
Scheme: mgr.GetScheme(),
PreventEIPDeallocation: fPreventEIPDeallocation,
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("Node"),
Scheme: mgr.GetScheme(),
PreventEIPDeallocation: fPreventEIPDeallocation,
MinReconciliationInterval: fNodeMinReconciliationInterval,
ReconciliationRequeueInterval: fNodeReconciliationRequeueInterval,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Node")
os.Exit(1)
Expand Down

0 comments on commit 24d6582

Please sign in to comment.