Skip to content

Commit

Permalink
Merge pull request #118770 from marquiz/devel/cgroup-driver-autoconfig
Browse files Browse the repository at this point in the history
kubelet: get cgroup driver config from CRI
  • Loading branch information
k8s-ci-robot authored Jul 17, 2023
2 parents c7e7eee + bfa62e0 commit 1fef8fd
Show file tree
Hide file tree
Showing 9 changed files with 1,157 additions and 426 deletions.
67 changes: 62 additions & 5 deletions cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
"github.com/coreos/go-systemd/v22/daemon"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"k8s.io/klog/v2"
"k8s.io/mount-utils"

Expand Down Expand Up @@ -76,6 +78,7 @@ import (
"k8s.io/component-base/version"
"k8s.io/component-base/version/verflag"
nodeutil "k8s.io/component-helpers/node/util"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"
"k8s.io/kubernetes/cmd/kubelet/app/options"
"k8s.io/kubernetes/pkg/api/legacyscheme"
Expand Down Expand Up @@ -625,6 +628,17 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
runAuthenticatorCAReload(ctx.Done())
}

if err := kubelet.PreInitRuntimeService(&s.KubeletConfiguration, kubeDeps); err != nil {
return err
}

// Get cgroup driver setting from CRI
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletCgroupDriverFromCRI) {
if err := getCgroupDriverFromCRI(ctx, s, kubeDeps); err != nil {
return err
}
}

var cgroupRoots []string
nodeAllocatableRoot := cm.NodeAllocatableRoot(s.CgroupRoot, s.CgroupsPerQOS, s.CgroupDriver)
cgroupRoots = append(cgroupRoots, nodeAllocatableRoot)
Expand Down Expand Up @@ -775,11 +789,6 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
klog.InfoS("Failed to ApplyOOMScoreAdj", "err", err)
}

err = kubelet.PreInitRuntimeService(&s.KubeletConfiguration, kubeDeps)
if err != nil {
return err
}

if err := RunKubelet(s, kubeDeps, s.RunOnce); err != nil {
return err
}
Expand Down Expand Up @@ -1282,3 +1291,51 @@ func newTracerProvider(s *options.KubeletServer) (oteltrace.TracerProvider, erro
}
return tp, nil
}

func getCgroupDriverFromCRI(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Dependencies) error {
klog.V(4).InfoS("Getting CRI runtime configuration information")

var (
runtimeConfig *runtimeapi.RuntimeConfigResponse
err error
)
// Retry a couple of times, hoping that any errors are transient.
// Fail quickly on known, non transient errors.
for i := 0; i < 3; i++ {
runtimeConfig, err = kubeDeps.RemoteRuntimeService.RuntimeConfig(ctx)
if err != nil {
s, ok := status.FromError(err)
if !ok || s.Code() != codes.Unimplemented {
// We could introduce a backoff delay or jitter, but this is largely catching cases
// where the runtime is still starting up and we request too early.
// Give it a little more time.
time.Sleep(time.Second * 2)
continue
}
// CRI implementation doesn't support RuntimeConfig, fallback
klog.InfoS("CRI implementation should be updated to support RuntimeConfig when KubeletCgroupDriverFromCRI feature gate has been enabled. Falling back to using cgroupDriver from kubelet config.")
return nil
}
}
if err != nil {
return err
}

// Calling GetLinux().GetCgroupDriver() won't segfault, but it will always default to systemd
// which is not intended by the fields not being populated
linuxConfig := runtimeConfig.GetLinux()
if linuxConfig == nil {
return nil
}

switch d := linuxConfig.GetCgroupDriver(); d {
case runtimeapi.CgroupDriver_SYSTEMD:
s.CgroupDriver = "systemd"
case runtimeapi.CgroupDriver_CGROUPFS:
s.CgroupDriver = "cgroupfs"
default:
return fmt.Errorf("runtime returned an unknown cgroup driver %d", d)
}
klog.InfoS("Using cgroup driver setting received from the CRI runtime", "cgroupDriver", s.CgroupDriver)
return nil
}
13 changes: 13 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,17 @@ const (
// yet.
JobTrackingWithFinalizers featuregate.Feature = "JobTrackingWithFinalizers"

// owner: @marquiz
// kep: http://kep.k8s.io/4033
// alpha: v1.28
//
// Enable detection of the kubelet cgroup driver configuration option from
// the CRI. The CRI runtime also needs to support this feature in which
// case the kubelet will ignore the cgroupDriver (--cgroup-driver)
// configuration option. If runtime doesn't support it, the kubelet will
// fallback to using it's cgroupDriver option.
KubeletCgroupDriverFromCRI featuregate.Feature = "KubeletCgroupDriverFromCRI"

// owner: @AkihiroSuda
// alpha: v1.22
//
Expand Down Expand Up @@ -1014,6 +1025,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

JobTrackingWithFinalizers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.28

KubeletCgroupDriverFromCRI: {Default: false, PreRelease: featuregate.Alpha},

KubeletInUserNamespace: {Default: false, PreRelease: featuregate.Alpha},

KubeletPodResources: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.28, remove in 1.30
Expand Down
10 changes: 10 additions & 0 deletions pkg/kubelet/cri/remote/fake/fake_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,13 @@ func (f *RemoteRuntime) ListPodSandboxMetrics(ctx context.Context, req *kubeapi.

return &kubeapi.ListPodSandboxMetricsResponse{PodMetrics: podMetrics}, nil
}

// RuntimeConfig returns the configuration information of the runtime.
func (f *RemoteRuntime) RuntimeConfig(ctx context.Context, req *kubeapi.RuntimeConfigRequest) (*kubeapi.RuntimeConfigResponse, error) {
resp, err := f.RuntimeService.RuntimeConfig(ctx)
if err != nil {
return nil, err
}

return resp, nil
}
15 changes: 15 additions & 0 deletions pkg/kubelet/cri/remote/remote_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -865,3 +865,18 @@ func (r *remoteRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*ru

return resp.GetPodMetrics(), nil
}

// RuntimeConfig returns the configuration information of the runtime.
func (r *remoteRuntimeService) RuntimeConfig(ctx context.Context) (*runtimeapi.RuntimeConfigResponse, error) {
ctx, cancel := context.WithTimeout(ctx, r.timeout)
defer cancel()

resp, err := r.runtimeClient.RuntimeConfig(ctx, &runtimeapi.RuntimeConfigRequest{})
if err != nil {
klog.ErrorS(err, "RuntimeConfig from runtime service failed")
return nil, err
}
klog.V(10).InfoS("[RemoteRuntimeService] RuntimeConfigResponse", "linuxConfig", resp.GetLinux())

return resp, nil
}
9 changes: 9 additions & 0 deletions pkg/kubelet/kuberuntime/instrumented_services.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,12 @@ func (in instrumentedRuntimeService) ListPodSandboxMetrics(ctx context.Context)
recordError(operation, err)
return out, err
}

func (in instrumentedRuntimeService) RuntimeConfig(ctx context.Context) (*runtimeapi.RuntimeConfigResponse, error) {
const operation = "runtime_config"
defer recordOperation(operation, time.Now())

out, err := in.service.RuntimeConfig(ctx)
recordError(operation, err)
return out, err
}
Loading

0 comments on commit 1fef8fd

Please sign in to comment.