Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CPU manager static policy #51180

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/kubelet/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat
fs.BoolVar(&c.CgroupsPerQOS, "cgroups-per-qos", c.CgroupsPerQOS, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
fs.StringVar(&c.CgroupDriver, "cgroup-driver", c.CgroupDriver, "Driver that the kubelet uses to manipulate cgroups on the host. Possible values: 'cgroupfs', 'systemd'")
fs.StringVar(&c.CgroupRoot, "cgroup-root", c.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&c.CPUManagerPolicy, "cpu-manager-policy", c.CPUManagerPolicy, "<Warning: Alpha feature> CPU Manager policy to use. Possible values: 'none', 'static'. Default: 'none'")
fs.DurationVar(&c.CPUManagerReconcilePeriod.Duration, "cpu-manager-reconcile-period", c.CPUManagerReconcilePeriod.Duration, "<Warning: Alpha feature> CPU Manager reconciliation period. Examples: '10s', or '1m'. If not supplied, defaults to `NodeStatusUpdateFrequency`")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why should this be configurable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@derekwaynecarr asked for this tunable so that we can evaluate the perf effect of cgroupfs scribbling frequency

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vishh -- i wanted a way to measure its impact. if we want to remove the flag before release, we can evaluate that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vishh -- earlier iterations of the PR had it reconciling every 1s, which i worried was going to be expensive.

fs.StringVar(&c.ContainerRuntime, "container-runtime", c.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'.")
fs.DurationVar(&c.RuntimeRequestTimeout.Duration, "runtime-request-timeout", c.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later.")
fs.StringVar(&c.LockFilePath, "lock-file", c.LockFilePath, "<Warning: Alpha feature> The path to file for kubelet to use as a lock file.")
Expand Down
4 changes: 3 additions & 1 deletion cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,9 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
SystemReserved: systemReserved,
HardEvictionThresholds: hardEvictionThresholds,
},
ExperimentalQOSReserved: *experimentalQOSReserved,
ExperimentalQOSReserved: *experimentalQOSReserved,
ExperimentalCPUManagerPolicy: s.CPUManagerPolicy,
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
},
s.FailSwapOn,
devicePluginEnabled,
Expand Down
7 changes: 7 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ const (
//
// Enable mount propagation of volumes.
MountPropagation utilfeature.Feature = "MountPropagation"

// owner: @ConnorDoyle
// alpha: v1.8
//
// Alternative container-level CPU affinity policies.
CPUManager utilfeature.Feature = "CPUManager"
)

func init() {
Expand Down Expand Up @@ -185,6 +191,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
TaintNodesByCondition: {Default: false, PreRelease: utilfeature.Alpha},
MountPropagation: {Default: false, PreRelease: utilfeature.Alpha},
ExpandPersistentVolumes: {Default: false, PreRelease: utilfeature.Alpha},
CPUManager: {Default: false, PreRelease: utilfeature.Alpha},

// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ type KubeletConfiguration struct {
RemoteRuntimeEndpoint string
// remoteImageEndpoint is the endpoint of remote image service
RemoteImageEndpoint string
// CPUManagerPolicy is the name of the policy to use.
CPUManagerPolicy string
// CPU Manager reconciliation period.
CPUManagerReconcilePeriod metav1.Duration
// runtimeRequestTimeout is the timeout for all runtime requests except long running
// requests - pull, logs, exec and attach.
// +optional
Expand Down
6 changes: 6 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/v1alpha1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.NodeStatusUpdateFrequency == zeroDuration {
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
}
if obj.CPUManagerPolicy == "" {
obj.CPUManagerPolicy = "none"
}
if obj.CPUManagerReconcilePeriod == zeroDuration {
obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency
}
if obj.OOMScoreAdj == nil {
temp := int32(qos.KubeletOOMScoreAdj)
obj.OOMScoreAdj = &temp
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ type KubeletConfiguration struct {
RemoteRuntimeEndpoint string `json:"remoteRuntimeEndpoint"`
// remoteImageEndpoint is the endpoint of remote image service
RemoteImageEndpoint string `json:"remoteImageEndpoint"`
// CPUManagerPolicy is the name of the policy to use.
CPUManagerPolicy string `json:"cpuManagerPolicy"`
// CPU Manager reconciliation period.
CPUManagerReconcilePeriod metav1.Duration `json:"cpuManagerReconcilePeriod"`
// runtimeRequestTimeout is the timeout for all runtime requests except long running
// requests - pull, logs, exec and attach.
RuntimeRequestTimeout metav1.Duration `json:"runtimeRequestTimeout"`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_kubeletconfig_KubeletConfigura
out.ContainerRuntime = in.ContainerRuntime
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
out.RemoteImageEndpoint = in.RemoteImageEndpoint
out.CPUManagerPolicy = in.CPUManagerPolicy
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.ExperimentalMounterPath = in.ExperimentalMounterPath
if err := v1.Convert_Pointer_string_To_string(&in.LockFilePath, &out.LockFilePath, s); err != nil {
Expand Down Expand Up @@ -390,6 +392,8 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigura
out.ContainerRuntime = in.ContainerRuntime
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
out.RemoteImageEndpoint = in.RemoteImageEndpoint
out.CPUManagerPolicy = in.CPUManagerPolicy
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.ExperimentalMounterPath = in.ExperimentalMounterPath
if err := v1.Convert_string_To_Pointer_string(&in.LockFilePath, &out.LockFilePath, s); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
**out = **in
}
}
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
if in.LockFilePath != nil {
in, out := &in.LockFilePath, &out.LockFilePath
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
out.NodeStatusUpdateFrequency = in.NodeStatusUpdateFrequency
out.ImageMinimumGCAge = in.ImageMinimumGCAge
out.VolumeStatsAggPeriod = in.VolumeStatsAggPeriod
out.CPUManagerReconcilePeriod = in.CPUManagerReconcilePeriod
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
if in.RegisterWithTaints != nil {
in, out := &in.RegisterWithTaints, &out.RegisterWithTaints
Expand Down
7 changes: 7 additions & 0 deletions pkg/kubelet/cm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ go_library(
"container_manager_unsupported.go",
"device_plugin_handler.go",
"device_plugin_handler_stub.go",
"fake_internal_container_lifecycle.go",
"helpers_unsupported.go",
"internal_container_lifecycle.go",
"pod_container_manager_stub.go",
"pod_container_manager_unsupported.go",
"types.go",
Expand All @@ -29,18 +31,23 @@ go_library(
}),
visibility = ["//visibility:public"],
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/cri:go_default_library",
"//pkg/kubelet/apis/deviceplugin/v1alpha1:go_default_library",
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
"//pkg/kubelet/cadvisor:go_default_library",
"//pkg/kubelet/cm/cpumanager:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/deviceplugin:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/status:go_default_library",
"//pkg/util/mount:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
Expand Down
12 changes: 10 additions & 2 deletions pkg/kubelet/cm/container_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ limitations under the License.
package cm

import (
"time"

"k8s.io/apimachinery/pkg/util/sets"
// TODO: Migrate kubelet to either use its own internal objects or client library.
"k8s.io/api/core/v1"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/status"

"fmt"
"strconv"
Expand All @@ -36,7 +40,7 @@ type ContainerManager interface {
// Runs the container manager's housekeeping.
// - Ensures that the Docker daemon is in a container.
// - Creates the system container where all non-containerized processes run.
Start(*v1.Node, ActivePodsFunc) error
Start(*v1.Node, ActivePodsFunc, status.PodStatusProvider, internalapi.RuntimeService) error

// Returns resources allocated to system cgroups in the machine.
// These cgroups include the system and Kubernetes services.
Expand Down Expand Up @@ -71,6 +75,8 @@ type ContainerManager interface {
// Returns RunContainerOptions with devices, mounts, and env fields populated for
// extended resources required by container.
GetResources(pod *v1.Pod, container *v1.Container, activePods []*v1.Pod) (*kubecontainer.RunContainerOptions, error)

InternalContainerLifecycle() InternalContainerLifecycle
}

type NodeConfig struct {
Expand All @@ -83,7 +89,9 @@ type NodeConfig struct {
CgroupDriver string
ProtectKernelDefaults bool
NodeAllocatableConfig
ExperimentalQOSReserved map[v1.ResourceName]int64
ExperimentalQOSReserved map[v1.ResourceName]int64
ExperimentalCPUManagerPolicy string
ExperimentalCPUManagerReconcilePeriod time.Duration
}

type NodeAllocatableConfig struct {
Expand Down
43 changes: 39 additions & 4 deletions pkg/kubelet/cm/container_manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,22 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"

"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
kubefeatures "k8s.io/kubernetes/pkg/features"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/status"
utilfile "k8s.io/kubernetes/pkg/util/file"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/util/oom"
Expand Down Expand Up @@ -120,6 +126,8 @@ type containerManagerImpl struct {
qosContainerManager QOSContainerManager
// Interface for exporting and allocating devices reported by device plugins.
devicePluginHandler DevicePluginHandler
// Interface for CPU affinity management.
cpuManager cpumanager.Manager
}

type features struct {
Expand Down Expand Up @@ -219,11 +227,11 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// It is safe to invoke `MachineInfo` on cAdvisor before logically initializing cAdvisor here because
// machine info is computed and cached once as part of cAdvisor object creation.
// But `RootFsInfo` and `ImagesFsInfo` are not available at this moment so they will be called later during manager starts
if info, err := cadvisorInterface.MachineInfo(); err == nil {
capacity = cadvisor.CapacityFromMachineInfo(info)
} else {
machineInfo, err := cadvisorInterface.MachineInfo()
if err != nil {
return nil, err
}
capacity = cadvisor.CapacityFromMachineInfo(machineInfo)

cgroupRoot := nodeConfig.CgroupRoot
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
Expand Down Expand Up @@ -287,6 +295,20 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
return nil, err
}

// Initialize CPU manager
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) {
cm.cpuManager, err = cpumanager.NewManager(
nodeConfig.ExperimentalCPUManagerPolicy,
nodeConfig.ExperimentalCPUManagerReconcilePeriod,
machineInfo,
cm.GetNodeAllocatableReservation(),
)
if err != nil {
glog.Errorf("failed to initialize cpu manager: %v", err)
return nil, err
}
}

return cm, nil
}

Expand All @@ -306,6 +328,10 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
}
}

func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle {
return &internalContainerLifecycleImpl{cm.cpuManager}
}

// Create a cgroup container manager.
func createManager(containerName string) *fs.Manager {
allowAllDevices := true
Expand Down Expand Up @@ -512,7 +538,16 @@ func (cm *containerManagerImpl) Status() Status {
return cm.status
}

func (cm *containerManagerImpl) Start(node *v1.Node, activePods ActivePodsFunc) error {
func (cm *containerManagerImpl) Start(node *v1.Node,
activePods ActivePodsFunc,
podStatusProvider status.PodStatusProvider,
runtimeService internalapi.RuntimeService) error {

// Initialize CPU manager
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) {
cm.cpuManager.Start(cpumanager.ActivePodsFunc(activePods), podStatusProvider, runtimeService)
}

// cache the node Info including resource capacity and
// allocatable of the node
cm.nodeInfo = node
Expand Down
9 changes: 8 additions & 1 deletion pkg/kubelet/cm/container_manager_stub.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@ import (
"github.com/golang/glog"
"k8s.io/api/core/v1"

internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/status"
)

type containerManagerStub struct{}

var _ ContainerManager = &containerManagerStub{}

func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc) error {
func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
glog.V(2).Infof("Starting stub container manager")
return nil
}
Expand Down Expand Up @@ -72,6 +75,10 @@ func (cm *containerManagerStub) GetResources(pod *v1.Pod, container *v1.Containe
return &kubecontainer.RunContainerOptions{}, nil
}

func (cm *containerManagerStub) InternalContainerLifecycle() InternalContainerLifecycle {
return &internalContainerLifecycleImpl{cpumanager.NewFakeManager()}
}

func NewStubContainerManager() ContainerManager {
return &containerManagerStub{}
}
9 changes: 8 additions & 1 deletion pkg/kubelet/cm/container_manager_unsupported.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ import (

"k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/util/mount"
)

Expand All @@ -33,7 +36,7 @@ type unsupportedContainerManager struct {

var _ ContainerManager = &unsupportedContainerManager{}

func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc) error {
func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
return fmt.Errorf("Container Manager is unsupported in this build")
}

Expand Down Expand Up @@ -77,6 +80,10 @@ func (cm *unsupportedContainerManager) GetResources(pod *v1.Pod, container *v1.C
return &kubecontainer.RunContainerOptions{}, nil
}

func (cm *unsupportedContainerManager) InternalContainerLifecycle() InternalContainerLifecycle {
return &internalContainerLifecycleImpl{cpumanager.NewFakeManager()}
}

func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool, devicePluginEnabled bool, recorder record.EventRecorder) (ContainerManager, error) {
return &unsupportedContainerManager{}, nil
}
4 changes: 3 additions & 1 deletion pkg/kubelet/cm/container_manager_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ import (

"k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/util/mount"
)

Expand All @@ -33,7 +35,7 @@ type containerManagerImpl struct {

var _ ContainerManager = &containerManagerImpl{}

func (cm *containerManagerImpl) Start(_ *v1.Node, _ ActivePodsFunc) error {
func (cm *containerManagerImpl) Start(_ *v1.Node, _ ActivePodsFunc, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
glog.V(2).Infof("Starting Windows stub container manager")
return nil
}
Expand Down
Loading