Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Monitor the /kubepods cgroup for allocatable metrics #57802

Merged
merged 1 commit into from
Feb 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/kubelet/apis/stats/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ const (
SystemContainerRuntime = "runtime"
// SystemContainerMisc is the container name for the system container tracking non-kubernetes processes.
SystemContainerMisc = "misc"
// SystemContainerPods is the container name for the system container tracking user pods.
SystemContainerPods = "pods"
)

// PodStats holds pod-level unprocessed sample stats.
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/cm/container_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ type ContainerManager interface {
UpdatePluginResources(*schedulercache.NodeInfo, *lifecycle.PodAdmitAttributes) error

InternalContainerLifecycle() InternalContainerLifecycle

// GetPodCgroupRoot returns the cgroup which contains all pods.
GetPodCgroupRoot() string
}

type NodeConfig struct {
Expand Down
5 changes: 5 additions & 0 deletions pkg/kubelet/cm/container_manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,11 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
return cm.NodeConfig
}

// GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods.
func (cm *containerManagerImpl) GetPodCgroupRoot() string {
return cm.cgroupManager.Name(CgroupName(cm.cgroupRoot))
}

func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems {
return cm.subsystems
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/cm/container_manager_stub.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ func (cm *containerManagerStub) InternalContainerLifecycle() InternalContainerLi
return &internalContainerLifecycleImpl{cpumanager.NewFakeManager()}
}

func (cm *containerManagerStub) GetPodCgroupRoot() string {
return ""
}

func NewStubContainerManager() ContainerManager {
return &containerManagerStub{}
}
18 changes: 9 additions & 9 deletions pkg/kubelet/eviction/eviction_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,11 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
}

// Start starts the control loop to observe and response to low compute resources.
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration) {
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
// start the eviction manager monitoring
go func() {
for {
if evictedPods := m.synchronize(diskInfoProvider, podFunc, capacityProvider); evictedPods != nil {
if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil {
glog.Infof("eviction manager: pods %s evicted, waiting for pod to be cleaned up", format.Pods(evictedPods))
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
} else {
Expand Down Expand Up @@ -219,7 +219,7 @@ func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observatio

// synchronize is the main control loop that enforces eviction thresholds.
// Returns the pod that was killed, or nil if no pod was killed.
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, capacityProvider CapacityProvider) []*v1.Pod {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 {
Expand Down Expand Up @@ -248,7 +248,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}

// make observations and get a function to derive pod usage stats relative to those observations.
observations, statsFunc := makeSignalObservations(summary, capacityProvider, activePods)
observations, statsFunc := makeSignalObservations(summary)
debugLogObservations("observations", observations)

// attempt to create a threshold notifier to improve eviction response time
Expand All @@ -259,15 +259,15 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
glog.Infof("soft memory eviction threshold crossed at %s", desc)
// TODO wait grace period for soft memory limit
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
m.synchronize(diskInfoProvider, podFunc)
})
if err != nil {
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
}
// start hard memory notification
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) {
glog.Infof("hard memory eviction threshold crossed at %s", desc)
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
m.synchronize(diskInfoProvider, podFunc)
})
if err != nil {
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
Expand Down Expand Up @@ -349,7 +349,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)

// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(resourceToReclaim, capacityProvider, activePods) {
if m.reclaimNodeLevelResources(resourceToReclaim) {
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
return nil
}
Expand Down Expand Up @@ -437,7 +437,7 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
}

// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, capacityProvider CapacityProvider, pods []*v1.Pod) bool {
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName) bool {
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
for _, nodeReclaimFunc := range nodeReclaimFuncs {
// attempt to reclaim the pressured resource.
Expand All @@ -454,7 +454,7 @@ func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceNam
}

// make observations and get a function to derive pod usage stats relative to those observations.
observations, _ := makeSignalObservations(summary, capacityProvider, pods)
observations, _ := makeSignalObservations(summary)
debugLogObservations("observations after resource reclaim", observations)

// determine the set of thresholds met independent of grace period
Expand Down
Loading