-
Notifications
You must be signed in to change notification settings - Fork 40k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Certificate rotation for kubelet server certs. #45059
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -195,6 +195,9 @@ func (s *fileStore) Update(certData, keyData []byte) (*tls.Certificate, error) { | |
ts := time.Now().Format("2006-01-02-15-04-05") | ||
pemFilename := s.filename(ts) | ||
|
||
if err := os.MkdirAll(s.certDirectory, 0755); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Does it make sense to use something like 0700 since the contents are going to be sensitive? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can do 0700. It would make sense to me. I put 0755 because it matches the existing behavior when this same directory gets created via other code paths. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. Maybe leave it as is then. Probably better to change it everywhere in a different PR if we care about this. |
||
return nil, fmt.Errorf("could not create directory %q to store certificates: %v", s.certDirectory, err) | ||
} | ||
certPath := filepath.Join(s.certDirectory, pemFilename) | ||
|
||
f, err := os.OpenFile(certPath, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0600) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,9 @@ limitations under the License. | |
package kubelet | ||
|
||
import ( | ||
"crypto/tls" | ||
"crypto/x509" | ||
"crypto/x509/pkix" | ||
"fmt" | ||
"net" | ||
"net/http" | ||
|
@@ -51,14 +54,17 @@ import ( | |
"k8s.io/client-go/util/integer" | ||
"k8s.io/kubernetes/pkg/api" | ||
"k8s.io/kubernetes/pkg/api/v1" | ||
certificates "k8s.io/kubernetes/pkg/apis/certificates/v1beta1" | ||
"k8s.io/kubernetes/pkg/apis/componentconfig" | ||
componentconfigv1alpha1 "k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1" | ||
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" | ||
clientcertificates "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/certificates/v1beta1" | ||
corelisters "k8s.io/kubernetes/pkg/client/listers/core/v1" | ||
"k8s.io/kubernetes/pkg/cloudprovider" | ||
"k8s.io/kubernetes/pkg/features" | ||
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri" | ||
"k8s.io/kubernetes/pkg/kubelet/cadvisor" | ||
"k8s.io/kubernetes/pkg/kubelet/certificate" | ||
"k8s.io/kubernetes/pkg/kubelet/cm" | ||
"k8s.io/kubernetes/pkg/kubelet/config" | ||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||
|
@@ -304,6 +310,8 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub | |
hostname := nodeutil.GetHostname(hostnameOverride) | ||
// Query the cloud provider for our node name, default to hostname | ||
nodeName := types.NodeName(hostname) | ||
cloudIPs := []net.IP{} | ||
cloudNames := []string{} | ||
if kubeDeps.Cloud != nil { | ||
var err error | ||
instances, ok := kubeDeps.Cloud.Instances() | ||
|
@@ -317,6 +325,25 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub | |
} | ||
|
||
glog.V(2).Infof("cloud provider determined current node name to be %s", nodeName) | ||
|
||
if utilfeature.DefaultFeatureGate.Enabled(features.RotateKubeletServerCertificate) { | ||
nodeAddresses, err := instances.NodeAddresses(nodeName) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get the addresses of the current instance from the cloud provider: %v", err) | ||
} | ||
for _, nodeAddress := range nodeAddresses { | ||
switch nodeAddress.Type { | ||
case v1.NodeExternalIP, v1.NodeInternalIP: | ||
ip := net.ParseIP(nodeAddress.Address) | ||
if ip != nil && !ip.IsLoopback() { | ||
cloudIPs = append(cloudIPs, ip) | ||
} | ||
case v1.NodeExternalDNS, v1.NodeInternalDNS, v1.NodeHostName: | ||
cloudNames = append(cloudNames, nodeAddress.Address) | ||
} | ||
} | ||
} | ||
|
||
} | ||
|
||
if kubeDeps.PodConfig == nil { | ||
|
@@ -654,6 +681,33 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub | |
|
||
klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet) | ||
|
||
if utilfeature.DefaultFeatureGate.Enabled(features.RotateKubeletServerCertificate) && kubeDeps.TLSOptions != nil { | ||
var ips []net.IP | ||
cfgAddress := net.ParseIP(kubeCfg.Address) | ||
if cfgAddress == nil || cfgAddress.IsUnspecified() { | ||
if localIPs, err := allLocalIPsWithoutLoopback(); err != nil { | ||
return nil, err | ||
} else { | ||
ips = localIPs | ||
} | ||
} else { | ||
ips = []net.IP{cfgAddress} | ||
} | ||
ips = append(ips, cloudIPs...) | ||
names := append([]string{klet.GetHostname(), hostnameOverride}, cloudNames...) | ||
klet.serverCertificateManager, err = initializeServerCertificateManager(klet.kubeClient, kubeCfg, klet.nodeName, ips, names) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to initialize certificate manager: %v", err) | ||
} | ||
kubeDeps.TLSOptions.Config.GetCertificate = func(*tls.ClientHelloInfo) (*tls.Certificate, error) { | ||
cert := klet.serverCertificateManager.Current() | ||
if cert == nil { | ||
return nil, fmt.Errorf("no certificate available") | ||
} | ||
return cert, nil | ||
} | ||
} | ||
|
||
klet.probeManager = prober.NewManager( | ||
klet.statusManager, | ||
klet.livenessManager, | ||
|
@@ -865,6 +919,9 @@ type Kubelet struct { | |
// Cached MachineInfo returned by cadvisor. | ||
machineInfo *cadvisorapi.MachineInfo | ||
|
||
// Handles certificate rotations. | ||
serverCertificateManager certificate.Manager | ||
|
||
// Syncs pods statuses with apiserver; also used as a cache of statuses. | ||
statusManager status.Manager | ||
|
||
|
@@ -1037,6 +1094,62 @@ type Kubelet struct { | |
dockerLegacyService dockershim.DockerLegacyService | ||
} | ||
|
||
func initializeServerCertificateManager(kubeClient clientset.Interface, kubeCfg *componentconfig.KubeletConfiguration, nodeName types.NodeName, ips []net.IP, hostnames []string) (certificate.Manager, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to move this into its own package (since it's not dependent on anything in the kubelet) and import it, primarily to separate it from this mega package, but also to make it easier to consume from other contexts (like tools that want to provide their own certificate management in a similar form). Objections to a small refactor? I also want to do that for the client code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
var certSigningRequestClient clientcertificates.CertificateSigningRequestInterface | ||
if kubeClient != nil && kubeClient.Certificates() != nil { | ||
certSigningRequestClient = kubeClient.Certificates().CertificateSigningRequests() | ||
} | ||
certificateStore, err := certificate.NewFileStore( | ||
"kubelet-server", | ||
kubeCfg.CertDirectory, | ||
kubeCfg.CertDirectory, | ||
kubeCfg.TLSCertFile, | ||
kubeCfg.TLSPrivateKeyFile) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to initialize certificate store: %v", err) | ||
} | ||
return certificate.NewManager(&certificate.Config{ | ||
CertificateSigningRequestClient: certSigningRequestClient, | ||
Template: &x509.CertificateRequest{ | ||
Subject: pkix.Name{ | ||
CommonName: string(nodeName), | ||
Organization: []string{"system:nodes"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this isn't what a serving cert for a node looks like... it need to have the node's IPs and DNS names as subject alt names... not sure what the subject should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CertificateRequest looks correct now. |
||
}, | ||
DNSNames: hostnames, | ||
IPAddresses: ips, | ||
}, | ||
Usages: []certificates.KeyUsage{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: might be worth having a helper in the certificates api package that returns usages for a TLS client cert, and one that returns usages for a TLS serving cert. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will follow on with this change. |
||
certificates.UsageKeyEncipherment, | ||
certificates.UsageServerAuth, | ||
certificates.UsageSigning, | ||
}, | ||
CertificateStore: certificateStore, | ||
}) | ||
} | ||
|
||
func allLocalIPsWithoutLoopback() ([]net.IP, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the kubelet takes an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
interfaces, err := net.Interfaces() | ||
if err != nil { | ||
return nil, fmt.Errorf("could not list network interfaces: %v", err) | ||
} | ||
var ips []net.IP | ||
for _, i := range interfaces { | ||
addresses, err := i.Addrs() | ||
if err != nil { | ||
return nil, fmt.Errorf("could not list the addresses for network interface %v: %v\n", i, err) | ||
} | ||
for _, address := range addresses { | ||
switch v := address.(type) { | ||
case *net.IPNet: | ||
if !v.IP.IsLoopback() { | ||
ips = append(ips, v.IP) | ||
} | ||
} | ||
} | ||
} | ||
return ips, nil | ||
} | ||
|
||
// setupDataDirs creates: | ||
// 1. the root directory | ||
// 2. the pods directory | ||
|
@@ -1100,25 +1213,30 @@ func (kl *Kubelet) StartGarbageCollection() { | |
// initializeModules will initialize internal modules that do not require the container runtime to be up. | ||
// Note that the modules here must not depend on modules that are not initialized here. | ||
func (kl *Kubelet) initializeModules() error { | ||
// Step 1: Prometheus metrics. | ||
// Prometheus metrics. | ||
metrics.Register(kl.runtimeCache) | ||
|
||
// Step 2: Setup filesystem directories. | ||
// Setup filesystem directories. | ||
if err := kl.setupDataDirs(); err != nil { | ||
return err | ||
} | ||
|
||
// Step 3: If the container logs directory does not exist, create it. | ||
// If the container logs directory does not exist, create it. | ||
if _, err := os.Stat(ContainerLogsDir); err != nil { | ||
if err := kl.os.MkdirAll(ContainerLogsDir, 0755); err != nil { | ||
glog.Errorf("Failed to create directory %q: %v", ContainerLogsDir, err) | ||
} | ||
} | ||
|
||
// Step 4: Start the image manager. | ||
// Start the image manager. | ||
kl.imageManager.Start() | ||
|
||
// Step 5: Start container manager. | ||
// Start the certificate manager. | ||
if utilfeature.DefaultFeatureGate.Enabled(features.RotateKubeletServerCertificate) { | ||
kl.serverCertificateManager.Start() | ||
} | ||
|
||
// Start container manager. | ||
node, err := kl.getNodeAnyWay() | ||
if err != nil { | ||
return fmt.Errorf("Kubelet failed to get node info: %v", err) | ||
|
@@ -1128,17 +1246,17 @@ func (kl *Kubelet) initializeModules() error { | |
return fmt.Errorf("Failed to start ContainerManager %v", err) | ||
} | ||
|
||
// Step 6: Start out of memory watcher. | ||
// Start out of memory watcher. | ||
if err := kl.oomWatcher.Start(kl.nodeRef); err != nil { | ||
return fmt.Errorf("Failed to start OOM watcher %v", err) | ||
} | ||
|
||
// Step 7: Initialize GPUs | ||
// Initialize GPUs | ||
if err := kl.gpuManager.Start(); err != nil { | ||
glog.Errorf("Failed to start gpuManager %v", err) | ||
} | ||
|
||
// Step 8: Start resource analyzer | ||
// Start resource analyzer | ||
kl.resourceAnalyzer.Start() | ||
|
||
return nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's worth thinking about how this will look once this feature reaches GA. I would still expect to be able to provide a kubelet with a serving cert/key and have it just use it without trying to self-rotate. At that point, would we want a GA feature gate that someone could disable? I'm a little unclear about the longevity of feature gates once they reach GA.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think feature gates should be a long-term option. If we want something to toggle this after it goes to GA, it should be a separate flag.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I sort of agree. I'd expect people to opt into rotation/renewal even once this is GA, since it places requirements on deployments to set up signers backing the CSR API
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#46603
Does it seem reasonable to have one flag covering all the certificate rotation in the kubelet?