-
Notifications
You must be signed in to change notification settings - Fork 40k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Deviceplugin jiayingz #51209
Deviceplugin jiayingz #51209
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ import ( | |
"k8s.io/client-go/tools/record" | ||
"k8s.io/kubernetes/pkg/kubelet/cadvisor" | ||
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" | ||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||
"k8s.io/kubernetes/pkg/kubelet/qos" | ||
utilfile "k8s.io/kubernetes/pkg/util/file" | ||
"k8s.io/kubernetes/pkg/util/mount" | ||
|
@@ -117,6 +118,8 @@ type containerManagerImpl struct { | |
recorder record.EventRecorder | ||
// Interface for QoS cgroup management | ||
qosContainerManager QOSContainerManager | ||
// Interface for exporting and allocating devices reported by device plugins. | ||
devicePluginHandler DevicePluginHandler | ||
} | ||
|
||
type features struct { | ||
|
@@ -179,7 +182,7 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) { | |
// TODO(vmarmol): Add limits to the system containers. | ||
// Takes the absolute name of the specified containers. | ||
// Empty container name disables use of the specified container. | ||
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) { | ||
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, devicePluginEnabled bool, recorder record.EventRecorder) (ContainerManager, error) { | ||
subsystems, err := GetCgroupSubsystems() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err) | ||
|
@@ -250,7 +253,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I | |
return nil, err | ||
} | ||
|
||
return &containerManagerImpl{ | ||
cm := &containerManagerImpl{ | ||
cadvisorInterface: cadvisorInterface, | ||
mountUtil: mountUtil, | ||
NodeConfig: nodeConfig, | ||
|
@@ -260,7 +263,31 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I | |
cgroupRoot: cgroupRoot, | ||
recorder: recorder, | ||
qosContainerManager: qosContainerManager, | ||
}, nil | ||
} | ||
|
||
updateDeviceCapacityFunc := func(updates v1.ResourceList) { | ||
cm.Lock() | ||
defer cm.Unlock() | ||
for k, v := range updates { | ||
if v.Value() <= 0 { | ||
delete(cm.capacity, k) | ||
} else { | ||
cm.capacity[k] = v | ||
} | ||
} | ||
} | ||
|
||
glog.Infof("Creating device plugin handler: %t", devicePluginEnabled) | ||
if devicePluginEnabled { | ||
cm.devicePluginHandler, err = NewDevicePluginHandlerImpl(updateDeviceCapacityFunc) | ||
} else { | ||
cm.devicePluginHandler, err = NewDevicePluginHandlerStub() | ||
} | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return cm, nil | ||
} | ||
|
||
// NewPodContainerManager is a factory method returns a PodContainerManager object | ||
|
@@ -543,6 +570,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node, activePods ActivePodsFunc) | |
} | ||
close(stopChan) | ||
}, time.Second, stopChan) | ||
|
||
// Starts device plugin manager. | ||
if err := cm.devicePluginHandler.Start(); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
|
@@ -560,6 +592,76 @@ func (cm *containerManagerImpl) setFsCapacity() error { | |
return nil | ||
} | ||
|
||
// TODO: move the GetResources logic to PodContainerManager. | ||
func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Container, activePods []*v1.Pod) (*kubecontainer.RunContainerOptions, error) { | ||
opts := &kubecontainer.RunContainerOptions{} | ||
// Gets devices, mounts, and envs from device plugin handler. | ||
glog.V(3).Infof("Calling devicePluginHandler AllocateDevices") | ||
// Maps to detect duplicate settings. | ||
devsMap := make(map[string]string) | ||
mountsMap := make(map[string]string) | ||
envsMap := make(map[string]string) | ||
allocResps, err := cm.devicePluginHandler.Allocate(pod, container, activePods) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, allocation of resources would happen as part of pod creation so as to fail pods during admission whose extended resource request cannot be satisfied by the kubelet due to any reason. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree that sounds a more ideal handling, which probably will be addressed after 1.8 when we try to move the GetResources logic to PodContainerManager. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you file an issue with the background information just that it doesn't get lost? You can assign the issue to yourself since you plan on fixing it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed #51592 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on what @vishh said. |
||
if err != nil { | ||
return opts, err | ||
} | ||
// Loops through AllocationResponses of all required extended resources. | ||
for _, resp := range allocResps { | ||
// Loops through runtime spec of all devices of the given resource. | ||
for _, devRuntime := range resp.Spec { | ||
// Updates RunContainerOptions.Devices. | ||
for _, dev := range devRuntime.Devices { | ||
if d, ok := devsMap[dev.ContainerPath]; ok { | ||
glog.V(3).Infof("skip existing device %s %s", dev.ContainerPath, dev.HostPath) | ||
if d != dev.HostPath { | ||
glog.Errorf("Container device %s has conflicting mapping host devices: %s and %s", | ||
dev.ContainerPath, d, dev.HostPath) | ||
} | ||
continue | ||
} | ||
devsMap[dev.ContainerPath] = dev.HostPath | ||
opts.Devices = append(opts.Devices, kubecontainer.DeviceInfo{ | ||
PathOnHost: dev.HostPath, | ||
PathInContainer: dev.ContainerPath, | ||
Permissions: dev.Permissions, | ||
}) | ||
} | ||
// Updates RunContainerOptions.Mounts. | ||
for _, mount := range devRuntime.Mounts { | ||
if m, ok := mountsMap[mount.ContainerPath]; ok { | ||
glog.V(3).Infof("skip existing mount %s %s", mount.ContainerPath, mount.HostPath) | ||
if m != mount.HostPath { | ||
glog.Errorf("Container mount %s has conflicting mapping host mounts: %s and %s", | ||
mount.ContainerPath, m, mount.HostPath) | ||
} | ||
continue | ||
} | ||
mountsMap[mount.ContainerPath] = mount.HostPath | ||
opts.Mounts = append(opts.Mounts, kubecontainer.Mount{ | ||
Name: mount.ContainerPath, | ||
ContainerPath: mount.ContainerPath, | ||
HostPath: mount.HostPath, | ||
ReadOnly: mount.ReadOnly, | ||
SELinuxRelabel: false, | ||
}) | ||
} | ||
// Updates RunContainerOptions.Envs. | ||
for k, v := range devRuntime.Envs { | ||
if e, ok := envsMap[k]; ok { | ||
glog.V(3).Infof("skip existing envs %s %s", k, v) | ||
if e != v { | ||
glog.Errorf("Environment variable %s has conflicting setting: %s and %s", k, e, v) | ||
} | ||
continue | ||
} | ||
envsMap[k] = v | ||
opts.Envs = append(opts.Envs, kubecontainer.EnvVar{Name: k, Value: v}) | ||
} | ||
} | ||
} | ||
return opts, nil | ||
} | ||
|
||
func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList { | ||
cpuLimit := int64(0) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,6 +72,10 @@ func (cm *unsupportedContainerManager) NewPodContainerManager() PodContainerMana | |
return &unsupportedPodContainerManager{} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) { | ||
func (cm *unsupportedContainerManager) GetResources(pod *v1.Pod, container *v1.Container, activePods []*v1.Pod) (*kubecontainer.RunContainerOptions, error) { | ||
return &kubecontainer.RunContainerOptions{}, nil | ||
} | ||
|
||
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool, devicePluginEnabled bool, recorder record.EventRecorder) (ContainerManager, error) { | ||
return &unsupportedContainerManager{}, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should log level be used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a one-time log, so probably ok to stay as info.