From 2c4aee6bd0822e7d4c888fd45d2123288d6f1cfb Mon Sep 17 00:00:00 2001 From: enxebre Date: Thu, 12 Dec 2024 13:03:12 +0100 Subject: [PATCH] Let the HO manage the Karpenter Operator Deployment The Karpeneter Operator Deployment is fully OCP version agnostic, it's applied by the HO and the binary lives within the HO image --- .../hostedcontrolplane_controller.go | 11 ++ .../hostedcluster/hostedcluster_controller.go | 5 + .../controllers/hostedcluster/karpenter.go | 116 ++++++++++++++++++ .../controllers/nodepool/token.go | 23 ++++ 4 files changed, 155 insertions(+) create mode 100644 hypershift-operator/controllers/hostedcluster/karpenter.go diff --git a/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go b/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go index 90d952fe7f..fe233365ed 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go +++ b/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go @@ -5335,6 +5335,17 @@ func createAWSDefaultSecurityGroup(ctx context.Context, ec2Client ec2iface.EC2AP Value: awssdk.String(awsSecurityGroupName(infraID)), }) } + + if hcp.Spec.AutoNode != nil && hcp.Spec.AutoNode.Provisioner.Name == hyperv1.ProvisionerKarpeneter && + hcp.Spec.AutoNode.Provisioner.Karpenter.Platform == hyperv1.AWSPlatform { + if !tagKeys.Has("karpenter.sh/discovery") { + tags = append(tags, &ec2.Tag{ + Key: awssdk.String("karpenter.sh/discovery"), + Value: awssdk.String(infraID), + }) + } + } + createSGResult, err := ec2Client.CreateSecurityGroup(&ec2.CreateSecurityGroupInput{ GroupName: awssdk.String(awsSecurityGroupName(infraID)), Description: awssdk.String("default worker security group"), diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index 484d0e697b..a74b2bfcdb 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -1757,6 +1757,10 @@ func (r *HostedClusterReconciler) reconcile(ctx context.Context, req ctrl.Reques } } + if err := r.reconcileKarpenterOperator(ctx, createOrUpdate, hcluster, hcp, r.HypershiftOperatorImage, controlPlaneOperatorImage); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to reconcile karpenter operator: %w", err) + } + // Reconcile the Ignition server if !controlplaneOperatorManagesIgnitionServer { releaseInfo, err := r.lookupReleaseImage(ctx, hcluster, releaseProvider) @@ -1955,6 +1959,7 @@ func reconcileHostedControlPlane(hcp *hyperv1.HostedControlPlane, hcluster *hype hcp.Spec.PausedUntil = hcluster.Spec.PausedUntil hcp.Spec.OLMCatalogPlacement = hcluster.Spec.OLMCatalogPlacement hcp.Spec.Autoscaling = hcluster.Spec.Autoscaling + hcp.Spec.AutoNode = hcluster.Spec.AutoNode hcp.Spec.NodeSelector = hcluster.Spec.NodeSelector hcp.Spec.Tolerations = hcluster.Spec.Tolerations hcp.Spec.Labels = hcluster.Spec.Labels diff --git a/hypershift-operator/controllers/hostedcluster/karpenter.go b/hypershift-operator/controllers/hostedcluster/karpenter.go new file mode 100644 index 0000000000..e165e831eb --- /dev/null +++ b/hypershift-operator/controllers/hostedcluster/karpenter.go @@ -0,0 +1,116 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostedcluster + +import ( + "context" + "fmt" + + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + karpenteroperatormanifest "github.com/openshift/hypershift/karpenter-operator/manifests" + "github.com/openshift/hypershift/support/upsert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" +) + +func (r *HostedClusterReconciler) reconcileKarpenterOperator(ctx context.Context, createOrUpdate upsert.CreateOrUpdateFN, hcluster *hyperv1.HostedCluster, hcp *hyperv1.HostedControlPlane, hypershiftOperatorImage, controlPlaneOperatorImage string) error { + if hcluster.Spec.AutoNode == nil || hcluster.Spec.AutoNode.Provisioner.Name != hyperv1.ProvisionerKarpeneter || + hcluster.Spec.AutoNode.Provisioner.Karpenter.Platform != hyperv1.AWSPlatform { + return nil + } + + // Generate configMap with KubeletConfig to register Nodes with karpenter expected taint. + taintConfigName := "set-karpenter-taint" + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: taintConfigName, + Namespace: hcluster.Namespace, + }, + } + + kubeletConfig := `apiVersion: machineconfiguration.openshift.io/v1 +kind: KubeletConfig +metadata: + name: set-karpenter-taint +spec: + kubeletConfig: + registerWithTaints: + - key: "karpenter.sh/unregistered" + value: "true" + effect: "NoExecute"` + + _, err := createOrUpdate(ctx, r.Client, configMap, func() error { + configMap.Data = map[string]string{ + "config": kubeletConfig, + } + return nil + }) + if err != nil { + return fmt.Errorf("failed to create configmap: %w", err) + } + + // Managed a NodePool to generate userData for Karpenter instances + // TODO(alberto): consider invoking the token library to manage the karpenter userdata programatically, + // instead of via NodePool API. + nodePool := &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "karpenter", + Namespace: hcluster.Namespace, + }, + } + _, err = createOrUpdate(ctx, r.Client, nodePool, func() error { + nodePool.Spec = hyperv1.NodePoolSpec{ + ClusterName: hcluster.Name, + Replicas: ptr.To(int32(0)), + Release: hcluster.Spec.Release, + Config: []corev1.LocalObjectReference{ + { + Name: taintConfigName, + }, + }, + Management: hyperv1.NodePoolManagement{ + UpgradeType: hyperv1.UpgradeTypeReplace, + Replace: &hyperv1.ReplaceUpgrade{ + Strategy: hyperv1.UpgradeStrategyRollingUpdate, + RollingUpdate: &hyperv1.RollingUpdate{ + MaxUnavailable: ptr.To(intstr.FromInt(0)), + MaxSurge: ptr.To(intstr.FromInt(1)), + }, + }, + AutoRepair: false, + }, + Platform: hyperv1.NodePoolPlatform{ + Type: hyperv1.AWSPlatform, + AWS: &hyperv1.AWSNodePoolPlatform{ + InstanceType: "m5.large", + Subnet: hyperv1.AWSResourceReference{ + ID: ptr.To("subnet-none"), + }, + }, + }, + } + return nil + }) + if err != nil { + return fmt.Errorf("failed to create configmap: %w", err) + } + // TODO(alberto): Ensure deletion if autoNode is disabled. + + // Run karpenter Operator to manage CRs management and guest side. + if err := karpenteroperatormanifest.ReconcileKarpenterOperator(ctx, createOrUpdate, r.Client, hypershiftOperatorImage, controlPlaneOperatorImage, hcp); err != nil { + return err + } + return nil +} diff --git a/hypershift-operator/controllers/nodepool/token.go b/hypershift-operator/controllers/nodepool/token.go index 58f8cfbb3b..d0c4254ea2 100644 --- a/hypershift-operator/controllers/nodepool/token.go +++ b/hypershift-operator/controllers/nodepool/token.go @@ -60,6 +60,7 @@ type userData struct { caCert []byte ignitionServerEndpoint string proxy *configv1.Proxy + ami string } // NewToken is the contract to create a new Token struct. @@ -124,10 +125,19 @@ func NewToken(ctx context.Context, configGenerator *ConfigGenerator, cpoCapabili proxy := globalconfig.ProxyConfig() globalconfig.ReconcileProxyConfigWithStatusFromHostedCluster(proxy, configGenerator.hostedCluster) + ami := "" + if configGenerator.hostedCluster.Spec.Platform.AWS != nil { + ami, err = defaultNodePoolAMI(configGenerator.hostedCluster.Spec.Platform.AWS.Region, configGenerator.nodePool.Spec.Arch, configGenerator.releaseImage) + if err != nil { + return nil, err + } + } + token.userData = &userData{ ignitionServerEndpoint: ignEndpoint, caCert: caCert, proxy: proxy, + ami: ami, } return token, nil @@ -335,6 +345,19 @@ func (t *Token) reconcileUserDataSecret(userDataSecret *corev1.Secret, token str userDataSecret.Annotations = make(map[string]string) } userDataSecret.Annotations[nodePoolAnnotation] = client.ObjectKeyFromObject(t.nodePool).String() + if userDataSecret.Labels == nil { + userDataSecret.Labels = make(map[string]string) + } + + if t.hostedCluster.Spec.AutoNode != nil && t.hostedCluster.Spec.AutoNode.Provisioner.Name == hyperv1.ProvisionerKarpeneter && + t.hostedCluster.Spec.AutoNode.Provisioner.Karpenter.Platform == hyperv1.AWSPlatform { + // TODO(alberto): prevent nodePool name collisions adding prefix to karpenter NodePool. + if t.nodePool.GetName() == "karpenter" { + userDataSecret.Labels[hyperv1.NodePoolLabel] = fmt.Sprintf("%s-%s", t.nodePool.Spec.ClusterName, t.nodePool.GetName()) + userDataSecret.Labels["hypershift.openshift.io/ami"] = t.userData.ami + } + + } encodedCACert := base64.StdEncoding.EncodeToString(t.userData.caCert) encodedToken := base64.StdEncoding.EncodeToString([]byte(token))