diff --git a/hack/golangci-hints.yaml b/hack/golangci-hints.yaml index 6b6b338664d94..28e54bd32fd9e 100644 --- a/hack/golangci-hints.yaml +++ b/hack/golangci-hints.yaml @@ -151,6 +151,7 @@ linters-settings: # please keep this alphabetized contextual k8s.io/kubernetes/pkg/controller/.* contextual k8s.io/kubernetes/pkg/scheduler/.* contextual k8s.io/kubernetes/test/e2e/dra/.* + contextual k8s.io/kubernetes/pkg/kubelet/cm/dra/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/hack/golangci-strict.yaml b/hack/golangci-strict.yaml index 7a6704f43fee6..751a689bb395e 100644 --- a/hack/golangci-strict.yaml +++ b/hack/golangci-strict.yaml @@ -197,6 +197,7 @@ linters-settings: # please keep this alphabetized contextual k8s.io/kubernetes/pkg/controller/.* contextual k8s.io/kubernetes/pkg/scheduler/.* contextual k8s.io/kubernetes/test/e2e/dra/.* + contextual k8s.io/kubernetes/pkg/kubelet/cm/dra/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/hack/golangci.yaml b/hack/golangci.yaml index 5c29d7f5f25e3..b9225b2aee5e8 100644 --- a/hack/golangci.yaml +++ b/hack/golangci.yaml @@ -200,6 +200,7 @@ linters-settings: # please keep this alphabetized contextual k8s.io/kubernetes/pkg/controller/.* contextual k8s.io/kubernetes/pkg/scheduler/.* contextual k8s.io/kubernetes/test/e2e/dra/.* + contextual k8s.io/kubernetes/pkg/kubelet/cm/dra/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/hack/logcheck.conf b/hack/logcheck.conf index c42245a49aa05..1ba8edd315f52 100644 --- a/hack/logcheck.conf +++ b/hack/logcheck.conf @@ -47,6 +47,7 @@ contextual k8s.io/kubernetes/cmd/kube-scheduler/.* contextual k8s.io/kubernetes/pkg/controller/.* contextual k8s.io/kubernetes/pkg/scheduler/.* contextual k8s.io/kubernetes/test/e2e/dra/.* +contextual k8s.io/kubernetes/pkg/kubelet/cm/dra/.* # As long as contextual logging is alpha or beta, all WithName, WithValues, # NewContext calls have to go through klog. Once it is GA, we can lift diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index 99fb69c8b98d9..fb82e039366cf 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -17,6 +17,7 @@ limitations under the License. package cm import ( + "context" "fmt" "strconv" "strings" @@ -55,7 +56,7 @@ type ContainerManager interface { // Runs the container manager's housekeeping. // - Ensures that the Docker daemon is in a container. // - Creates the system container where all non-containerized processes run. - Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error + Start(context.Context, *v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error // SystemCgroupsLimit returns resources allocated to system cgroups in the machine. // These cgroups include the system and Kubernetes services. @@ -94,7 +95,7 @@ type ContainerManager interface { // GetResources returns RunContainerOptions with devices, mounts, and env fields populated for // extended resources required by container. - GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) + GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) // UpdatePluginResources calls Allocate of device plugin handler for potential // requests for device plugin resources, and returns an error if fails. @@ -124,10 +125,10 @@ type ContainerManager interface { GetNodeAllocatableAbsolute() v1.ResourceList // PrepareDynamicResource prepares dynamic pod resources - PrepareDynamicResources(*v1.Pod) error + PrepareDynamicResources(context.Context, *v1.Pod) error // UnprepareDynamicResources unprepares dynamic pod resources - UnprepareDynamicResources(*v1.Pod) error + UnprepareDynamicResources(context.Context, *v1.Pod) error // PodMightNeedToUnprepareResources returns true if the pod with the given UID // might need to unprepare resources. diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 29be9d202b847..cc4d8468cca85 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -553,19 +553,18 @@ func (cm *containerManagerImpl) Status() Status { return cm.status } -func (cm *containerManagerImpl) Start(node *v1.Node, +func (cm *containerManagerImpl) Start(ctx context.Context, node *v1.Node, activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, runtimeService internalapi.RuntimeService, localStorageCapacityIsolation bool) error { - ctx := context.Background() containerMap, containerRunningSet := buildContainerMapAndRunningSetFromRuntime(ctx, runtimeService) // Initialize DRA manager if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DynamicResourceAllocation) { - err := cm.draManager.Start(dra.ActivePodsFunc(activePods), sourcesReady) + err := cm.draManager.Start(ctx, dra.ActivePodsFunc(activePods), sourcesReady) if err != nil { return fmt.Errorf("start dra manager error: %w", err) } @@ -655,13 +654,15 @@ func (cm *containerManagerImpl) GetPluginRegistrationHandler() cache.PluginHandl } // TODO: move the GetResources logic to PodContainerManager. -func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { +func (cm *containerManagerImpl) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { + logger := klog.FromContext(ctx) opts := &kubecontainer.RunContainerOptions{} if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DynamicResourceAllocation) { resOpts, err := cm.draManager.GetResources(pod, container) if err != nil { return nil, err } + logger.V(5).Info("Determined CDI devices for pod", "pod", klog.KObj(pod), "cdiDevices", resOpts.CDIDevices) opts.CDIDevices = append(opts.CDIDevices, resOpts.CDIDevices...) } // Allocate should already be called during predicateAdmitHandler.Admit(), @@ -1017,12 +1018,12 @@ func containerMemoryFromBlock(blocks []memorymanagerstate.Block) []*podresources return containerMemories } -func (cm *containerManagerImpl) PrepareDynamicResources(pod *v1.Pod) error { - return cm.draManager.PrepareResources(pod) +func (cm *containerManagerImpl) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error { + return cm.draManager.PrepareResources(ctx, pod) } -func (cm *containerManagerImpl) UnprepareDynamicResources(pod *v1.Pod) error { - return cm.draManager.UnprepareResources(pod) +func (cm *containerManagerImpl) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error { + return cm.draManager.UnprepareResources(ctx, pod) } func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool { diff --git a/pkg/kubelet/cm/container_manager_stub.go b/pkg/kubelet/cm/container_manager_stub.go index 6f849b67e52a1..35aeee3672437 100644 --- a/pkg/kubelet/cm/container_manager_stub.go +++ b/pkg/kubelet/cm/container_manager_stub.go @@ -17,6 +17,7 @@ limitations under the License. package cm import ( + "context" "fmt" v1 "k8s.io/api/core/v1" @@ -45,7 +46,7 @@ type containerManagerStub struct { var _ ContainerManager = &containerManagerStub{} -func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { +func (cm *containerManagerStub) Start(_ context.Context, _ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { klog.V(2).InfoS("Starting stub container manager") return nil } @@ -110,7 +111,7 @@ func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager { return &podContainerManagerStub{} } -func (cm *containerManagerStub) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { +func (cm *containerManagerStub) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { return &kubecontainer.RunContainerOptions{}, nil } @@ -170,11 +171,11 @@ func (cm *containerManagerStub) GetNodeAllocatableAbsolute() v1.ResourceList { return nil } -func (cm *containerManagerStub) PrepareDynamicResources(pod *v1.Pod) error { +func (cm *containerManagerStub) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } -func (cm *containerManagerStub) UnprepareDynamicResources(*v1.Pod) error { +func (cm *containerManagerStub) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } diff --git a/pkg/kubelet/cm/container_manager_unsupported.go b/pkg/kubelet/cm/container_manager_unsupported.go index ba4f9a1b6ad4a..a384228d31a1c 100644 --- a/pkg/kubelet/cm/container_manager_unsupported.go +++ b/pkg/kubelet/cm/container_manager_unsupported.go @@ -20,6 +20,7 @@ limitations under the License. package cm import ( + "context" "fmt" "k8s.io/mount-utils" @@ -39,7 +40,7 @@ type unsupportedContainerManager struct { var _ ContainerManager = &unsupportedContainerManager{} -func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { +func (unsupportedContainerManager) Start(_ context.Context, _ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { return fmt.Errorf("Container Manager is unsupported in this build") } diff --git a/pkg/kubelet/cm/container_manager_windows.go b/pkg/kubelet/cm/container_manager_windows.go index 79a308d6047c5..232d121223a6a 100644 --- a/pkg/kubelet/cm/container_manager_windows.go +++ b/pkg/kubelet/cm/container_manager_windows.go @@ -70,7 +70,7 @@ func (ra *noopWindowsResourceAllocator) Admit(attrs *lifecycle.PodAdmitAttribute return admission.GetPodAdmitResult(nil) } -func (cm *containerManagerImpl) Start(node *v1.Node, +func (cm *containerManagerImpl) Start(ctx context.Context, node *v1.Node, activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, @@ -88,7 +88,6 @@ func (cm *containerManagerImpl) Start(node *v1.Node, } } - ctx := context.Background() containerMap, containerRunningSet := buildContainerMapAndRunningSetFromRuntime(ctx, runtimeService) // Starts device manager. @@ -189,7 +188,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { return &podContainerManagerStub{} } -func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { +func (cm *containerManagerImpl) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { opts := &kubecontainer.RunContainerOptions{} // Allocate should already be called during predicateAdmitHandler.Admit(), // just try to fetch device runtime information from cached state here @@ -275,11 +274,11 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C return nil } -func (cm *containerManagerImpl) PrepareDynamicResources(pod *v1.Pod) error { +func (cm *containerManagerImpl) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } -func (cm *containerManagerImpl) UnprepareDynamicResources(*v1.Pod) error { +func (cm *containerManagerImpl) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } diff --git a/pkg/kubelet/cm/dra/manager.go b/pkg/kubelet/cm/dra/manager.go index bd8416003fc1a..ab8ad8c7e5d44 100644 --- a/pkg/kubelet/cm/dra/manager.go +++ b/pkg/kubelet/cm/dra/manager.go @@ -68,8 +68,6 @@ type ManagerImpl struct { // NewManagerImpl creates a new manager. func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, nodeName types.NodeName) (*ManagerImpl, error) { - klog.V(2).InfoS("Creating DRA manager") - claimInfoCache, err := newClaimInfoCache(stateFileDirectory, draManagerStateFileName) if err != nil { return nil, fmt.Errorf("failed to create claimInfo cache: %+v", err) @@ -91,15 +89,16 @@ func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string, n } // Start starts the reconcile loop of the manager. -func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error { +func (m *ManagerImpl) Start(ctx context.Context, activePods ActivePodsFunc, sourcesReady config.SourcesReady) error { m.activePods = activePods m.sourcesReady = sourcesReady - go wait.Until(func() { m.reconcileLoop() }, m.reconcilePeriod, wait.NeverStop) + go wait.UntilWithContext(ctx, func(ctx context.Context) { m.reconcileLoop(ctx) }, m.reconcilePeriod) return nil } // reconcileLoop ensures that any stale state in the manager's claimInfoCache gets periodically reconciled. -func (m *ManagerImpl) reconcileLoop() { +func (m *ManagerImpl) reconcileLoop(ctx context.Context) { + logger := klog.FromContext(ctx) // Only once all sources are ready do we attempt to reconcile. // This ensures that the call to m.activePods() below will succeed with // the actual active pods list. @@ -140,8 +139,8 @@ func (m *ManagerImpl) reconcileLoop() { // Loop through all inactive pods and call UnprepareResources on them. for _, podClaims := range inactivePodClaims { - if err := m.unprepareResources(podClaims.uid, podClaims.namespace, podClaims.claimNames); err != nil { - klog.ErrorS(err, "Unpreparing pod resources in reconcile loop", "podUID", podClaims.uid) + if err := m.unprepareResources(ctx, podClaims.uid, podClaims.namespace, podClaims.claimNames); err != nil { + logger.Info("Unpreparing pod resources in reconcile loop failed, will retry", "podUID", podClaims.uid, "err", err) } } } @@ -150,12 +149,13 @@ func (m *ManagerImpl) reconcileLoop() { // for the input container, issue NodePrepareResources rpc requests // for each new resource requirement, process their responses and update the cached // containerResources on success. -func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { +func (m *ManagerImpl) PrepareResources(ctx context.Context, pod *v1.Pod) error { + logger := klog.FromContext(ctx) batches := make(map[string][]*drapb.Claim) resourceClaims := make(map[types.UID]*resourceapi.ResourceClaim) for i := range pod.Spec.ResourceClaims { podClaim := &pod.Spec.ResourceClaims[i] - klog.V(3).InfoS("Processing resource", "pod", klog.KObj(pod), "podClaim", podClaim.Name) + logger.V(3).Info("Processing resource", "pod", klog.KObj(pod), "podClaim", podClaim.Name) claimName, mustCheckOwner, err := resourceclaim.Name(pod, podClaim) if err != nil { return fmt.Errorf("prepare resource claim: %v", err) @@ -163,12 +163,12 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { if claimName == nil { // Nothing to do. - klog.V(5).InfoS("No need to prepare resources, no claim generated", "pod", klog.KObj(pod), "podClaim", podClaim.Name) + logger.V(5).Info("No need to prepare resources, no claim generated", "pod", klog.KObj(pod), "podClaim", podClaim.Name) continue } // Query claim object from the API server resourceClaim, err := m.kubeClient.ResourceV1alpha3().ResourceClaims(pod.Namespace).Get( - context.TODO(), + ctx, *claimName, metav1.GetOptions{}) if err != nil { @@ -198,9 +198,9 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { return fmt.Errorf("claim %s: %w", klog.KObj(resourceClaim), err) } claimInfo = m.cache.add(ci) - klog.V(6).InfoS("Created new claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo) + logger.V(6).Info("Created new claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo) } else { - klog.V(6).InfoS("Found existing claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo) + logger.V(6).Info("Found existing claim info cache entry", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim), "claimInfoEntry", claimInfo) } // Add a reference to the current pod in the claim info. @@ -216,7 +216,7 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { // If this claim is already prepared, there is no need to prepare it again. if claimInfo.isPrepared() { - klog.V(5).InfoS("Resources already prepared", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim)) + logger.V(5).Info("Resources already prepared", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim)) return nil } @@ -250,7 +250,7 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error { if err != nil { return fmt.Errorf("failed to get gRPC client for driver %s: %w", driverName, err) } - response, err := client.NodePrepareResources(context.Background(), &drapb.NodePrepareResourcesRequest{Claims: claims}) + response, err := client.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{Claims: claims}) if err != nil { // General error unrelated to any particular claim. return fmt.Errorf("NodePrepareResources failed: %w", err) @@ -338,7 +338,6 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta // was generated for the referenced claim. There are valid use // cases when this might happen, so we simply skip it. if claimName == nil { - klog.V(5).InfoS("No CDI devices, no claim generated", "pod", klog.KObj(pod), "podClaimName", podClaim.Name) continue } for _, claim := range container.Resources.Claims { @@ -362,8 +361,6 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta } } } - - klog.V(5).InfoS("Determined CDI devices for pod", "pod", klog.KObj(pod), "cdiDevices", cdiDevices) return &ContainerInfo{CDIDevices: cdiDevices}, nil } @@ -371,7 +368,7 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta // This function is idempotent and may be called multiple times against the same pod. // As such, calls to the underlying NodeUnprepareResource API are skipped for claims that have // already been successfully unprepared. -func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { +func (m *ManagerImpl) UnprepareResources(ctx context.Context, pod *v1.Pod) error { var claimNames []string for i := range pod.Spec.ResourceClaims { claimName, _, err := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i]) @@ -386,10 +383,11 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { } claimNames = append(claimNames, *claimName) } - return m.unprepareResources(pod.UID, pod.Namespace, claimNames) + return m.unprepareResources(ctx, pod.UID, pod.Namespace, claimNames) } -func (m *ManagerImpl) unprepareResources(podUID types.UID, namespace string, claimNames []string) error { +func (m *ManagerImpl) unprepareResources(ctx context.Context, podUID types.UID, namespace string, claimNames []string) error { + logger := klog.FromContext(ctx) batches := make(map[string][]*drapb.Claim) claimNamesMap := make(map[types.UID]string) for _, claimName := range claimNames { @@ -445,7 +443,7 @@ func (m *ManagerImpl) unprepareResources(podUID types.UID, namespace string, cla if err != nil { return fmt.Errorf("get gRPC client for DRA driver %s: %w", driverName, err) } - response, err := client.NodeUnprepareResources(context.Background(), &drapb.NodeUnprepareResourcesRequest{Claims: claims}) + response, err := client.NodeUnprepareResources(ctx, &drapb.NodeUnprepareResourcesRequest{Claims: claims}) if err != nil { // General error unrelated to any particular claim. return fmt.Errorf("NodeUnprepareResources failed: %w", err) @@ -473,7 +471,7 @@ func (m *ManagerImpl) unprepareResources(podUID types.UID, namespace string, cla for _, claimName := range claimNamesMap { claimInfo, _ := m.cache.get(claimName, namespace) m.cache.delete(claimName, namespace) - klog.V(6).InfoS("Deleted claim info cache entry", "claim", klog.KRef(namespace, claimName), "claimInfoEntry", claimInfo) + logger.V(6).Info("Deleted claim info cache entry", "claim", klog.KRef(namespace, claimName), "claimInfoEntry", claimInfo) } // Atomically sync the cache back to the checkpoint. diff --git a/pkg/kubelet/cm/dra/manager_test.go b/pkg/kubelet/cm/dra/manager_test.go index dc8f26a741a0e..adf0305eaf17e 100644 --- a/pkg/kubelet/cm/dra/manager_test.go +++ b/pkg/kubelet/cm/dra/manager_test.go @@ -41,6 +41,7 @@ import ( drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha4" "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin" "k8s.io/kubernetes/pkg/kubelet/cm/dra/state" + "k8s.io/kubernetes/test/utils/ktesting" ) const ( @@ -538,6 +539,7 @@ func TestPrepareResources(t *testing.T) { }, } { t.Run(test.description, func(t *testing.T) { + tCtx := ktesting.Init(t) cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName) if err != nil { t.Fatalf("failed to newClaimInfoCache, err:%v", err) @@ -549,11 +551,11 @@ func TestPrepareResources(t *testing.T) { } if test.claim != nil { - if _, err := fakeKubeClient.ResourceV1alpha3().ResourceClaims(test.pod.Namespace).Create(context.Background(), test.claim, metav1.CreateOptions{}); err != nil { + if _, err := fakeKubeClient.ResourceV1alpha3().ResourceClaims(test.pod.Namespace).Create(tCtx, test.claim, metav1.CreateOptions{}); err != nil { t.Fatalf("failed to create ResourceClaim %s: %+v", test.claim.Name, err) } defer func() { - require.NoError(t, fakeKubeClient.ResourceV1alpha3().ResourceClaims(test.pod.Namespace).Delete(context.Background(), test.claim.Name, metav1.DeleteOptions{})) + require.NoError(t, fakeKubeClient.ResourceV1alpha3().ResourceClaims(test.pod.Namespace).Delete(tCtx, test.claim.Name, metav1.DeleteOptions{})) }() } @@ -579,7 +581,7 @@ func TestPrepareResources(t *testing.T) { manager.cache.add(test.claimInfo) } - err = manager.PrepareResources(test.pod) + err = manager.PrepareResources(tCtx, test.pod) assert.Equal(t, test.expectedPrepareCalls, draServerInfo.server.prepareResourceCalls.Load()) @@ -688,6 +690,7 @@ func TestUnprepareResources(t *testing.T) { }, } { t.Run(test.description, func(t *testing.T) { + tCtx := ktesting.Init(t) cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName) if err != nil { t.Fatalf("failed to create a new instance of the claimInfoCache, err: %v", err) @@ -720,7 +723,7 @@ func TestUnprepareResources(t *testing.T) { manager.cache.add(test.claimInfo) } - err = manager.UnprepareResources(test.pod) + err = manager.UnprepareResources(tCtx, test.pod) assert.Equal(t, test.expectedUnprepareCalls, draServerInfo.server.unprepareResourceCalls.Load()) @@ -866,6 +869,8 @@ func TestGetContainerClaimInfos(t *testing.T) { // TestParallelPrepareUnprepareResources calls PrepareResources and UnprepareResources APIs in parallel // to detect possible data races func TestParallelPrepareUnprepareResources(t *testing.T) { + tCtx := ktesting.Init(t) + // Setup and register fake DRA driver draServerInfo, err := setupFakeDRADriverGRPCServer(false, nil, nil, nil) if err != nil { @@ -934,17 +939,17 @@ func TestParallelPrepareUnprepareResources(t *testing.T) { } claim := genTestClaim(claimName, driverName, deviceName, string(podUID)) - if _, err = fakeKubeClient.ResourceV1alpha3().ResourceClaims(pod.Namespace).Create(context.Background(), claim, metav1.CreateOptions{}); err != nil { + if _, err = fakeKubeClient.ResourceV1alpha3().ResourceClaims(pod.Namespace).Create(tCtx, claim, metav1.CreateOptions{}); err != nil { t.Errorf("failed to create ResourceClaim %s: %+v", claim.Name, err) return } - if err = manager.PrepareResources(pod); err != nil { + if err = manager.PrepareResources(tCtx, pod); err != nil { t.Errorf("pod: %s: PrepareResources failed: %+v", pod.Name, err) return } - if err = manager.UnprepareResources(pod); err != nil { + if err = manager.UnprepareResources(tCtx, pod); err != nil { t.Errorf("pod: %s: UnprepareResources failed: %+v", pod.Name, err) return } diff --git a/pkg/kubelet/cm/dra/plugin/client_test.go b/pkg/kubelet/cm/dra/plugin/client_test.go index 97e948a31b5df..d78ed6bd39394 100644 --- a/pkg/kubelet/cm/dra/plugin/client_test.go +++ b/pkg/kubelet/cm/dra/plugin/client_test.go @@ -97,7 +97,7 @@ func setupFakeGRPCServer(version string) (string, tearDown, error) { } func TestGRPCConnIsReused(t *testing.T) { - ctx := ktesting.Init(t) + tCtx := ktesting.Init(t) addr, teardown, err := setupFakeGRPCServer(v1alpha4Version) if err != nil { t.Fatal(err) @@ -109,7 +109,7 @@ func TestGRPCConnIsReused(t *testing.T) { m := sync.Mutex{} p := &Plugin{ - backgroundCtx: ctx, + backgroundCtx: tCtx, endpoint: addr, clientCallTimeout: defaultClientCallTimeout, } @@ -149,7 +149,8 @@ func TestGRPCConnIsReused(t *testing.T) { }, }, } - _, err = client.NodePrepareResources(context.TODO(), req) + + _, err = client.NodePrepareResources(tCtx, req) assert.NoError(t, err) client.mutex.Lock() @@ -237,7 +238,7 @@ func TestNodeUnprepareResources(t *testing.T) { }, } { t.Run(test.description, func(t *testing.T) { - ctx := ktesting.Init(t) + tCtx := ktesting.Init(t) addr, teardown, err := setupFakeGRPCServer(test.serverVersion) if err != nil { t.Fatal(err) @@ -245,7 +246,7 @@ func TestNodeUnprepareResources(t *testing.T) { defer teardown() p := &Plugin{ - backgroundCtx: ctx, + backgroundCtx: tCtx, endpoint: addr, clientCallTimeout: defaultClientCallTimeout, } @@ -269,7 +270,7 @@ func TestNodeUnprepareResources(t *testing.T) { t.Fatal(err) } - _, err = client.NodeUnprepareResources(context.TODO(), test.request) + _, err = client.NodeUnprepareResources(tCtx, test.request) if err != nil { t.Fatal(err) } diff --git a/pkg/kubelet/cm/dra/plugin/plugin.go b/pkg/kubelet/cm/dra/plugin/plugin.go index fceea7e4794e0..99e577a42599f 100644 --- a/pkg/kubelet/cm/dra/plugin/plugin.go +++ b/pkg/kubelet/cm/dra/plugin/plugin.go @@ -20,7 +20,6 @@ import ( "context" "errors" "fmt" - "strings" "time" v1 "k8s.io/api/core/v1" @@ -226,8 +225,6 @@ func (h *RegistrationHandler) DeRegisterPlugin(pluginName string) { // ValidatePlugin is called by kubelet's plugin watcher upon detection // of a new registration socket opened by DRA plugin. func (h *RegistrationHandler) ValidatePlugin(pluginName string, endpoint string, versions []string) error { - klog.InfoS("Validate DRA plugin", "name", pluginName, "endpoint", endpoint, "versions", strings.Join(versions, ",")) - _, err := h.validateVersions(pluginName, versions) if err != nil { return fmt.Errorf("invalid versions of plugin %s: %w", pluginName, err) diff --git a/pkg/kubelet/cm/dra/types.go b/pkg/kubelet/cm/dra/types.go index 6b37d5039b521..266d55f3e624f 100644 --- a/pkg/kubelet/cm/dra/types.go +++ b/pkg/kubelet/cm/dra/types.go @@ -17,6 +17,8 @@ limitations under the License. package dra import ( + "context" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/kubernetes/pkg/kubelet/config" @@ -27,14 +29,14 @@ import ( type Manager interface { // Start starts the reconcile loop of the manager. // This will ensure that all claims are unprepared even if pods get deleted unexpectedly. - Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error + Start(ctx context.Context, activePods ActivePodsFunc, sourcesReady config.SourcesReady) error // PrepareResources prepares resources for a pod. // It communicates with the DRA resource plugin to prepare resources. - PrepareResources(pod *v1.Pod) error + PrepareResources(ctx context.Context, pod *v1.Pod) error // UnprepareResources calls NodeUnprepareResource GRPC from DRA plugin to unprepare pod resources - UnprepareResources(pod *v1.Pod) error + UnprepareResources(ctx context.Context, pod *v1.Pod) error // GetResources gets a ContainerInfo object from the claimInfo cache. // This information is used by the caller to update a container config. diff --git a/pkg/kubelet/cm/fake_container_manager.go b/pkg/kubelet/cm/fake_container_manager.go index e1c687c935eb1..f3f6c6eb50951 100644 --- a/pkg/kubelet/cm/fake_container_manager.go +++ b/pkg/kubelet/cm/fake_container_manager.go @@ -17,6 +17,7 @@ limitations under the License. package cm import ( + "context" "sync" v1 "k8s.io/api/core/v1" @@ -52,7 +53,7 @@ func NewFakeContainerManager() *FakeContainerManager { } } -func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { +func (cm *FakeContainerManager) Start(_ context.Context, _ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { cm.Lock() defer cm.Unlock() cm.CalledFunctions = append(cm.CalledFunctions, "Start") @@ -144,7 +145,7 @@ func (cm *FakeContainerManager) NewPodContainerManager() PodContainerManager { return cm.PodContainerManager } -func (cm *FakeContainerManager) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { +func (cm *FakeContainerManager) GetResources(ctx context.Context, pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { cm.Lock() defer cm.Unlock() cm.CalledFunctions = append(cm.CalledFunctions, "GetResources") @@ -243,11 +244,11 @@ func (cm *FakeContainerManager) GetNodeAllocatableAbsolute() v1.ResourceList { return nil } -func (cm *FakeContainerManager) PrepareDynamicResources(pod *v1.Pod) error { +func (cm *FakeContainerManager) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } -func (cm *FakeContainerManager) UnprepareDynamicResources(*v1.Pod) error { +func (cm *FakeContainerManager) UnprepareDynamicResources(context.Context, *v1.Pod) error { return nil } diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 2397b9f863917..580ee34893a1e 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -62,10 +62,10 @@ type RuntimeHelper interface { GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) // PrepareDynamicResources prepares resources for a pod. - PrepareDynamicResources(pod *v1.Pod) error + PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error // UnprepareDynamicResources unprepares resources for a a pod. - UnprepareDynamicResources(pod *v1.Pod) error + UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error } // ShouldContainerBeRestarted checks whether a container needs to be restarted. diff --git a/pkg/kubelet/container/testing/fake_runtime_helper.go b/pkg/kubelet/container/testing/fake_runtime_helper.go index ea88af9e34ad8..b56ea1f200066 100644 --- a/pkg/kubelet/container/testing/fake_runtime_helper.go +++ b/pkg/kubelet/container/testing/fake_runtime_helper.go @@ -107,10 +107,10 @@ func (f *FakeRuntimeHelper) GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtim }, nil } -func (f *FakeRuntimeHelper) PrepareDynamicResources(pod *v1.Pod) error { +func (f *FakeRuntimeHelper) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } -func (f *FakeRuntimeHelper) UnprepareDynamicResources(pod *v1.Pod) error { +func (f *FakeRuntimeHelper) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error { return nil } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 6f602eb425c77..188614479ab07 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1561,7 +1561,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() { os.Exit(1) } // containerManager must start after cAdvisor because it needs filesystem capacity information - if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService, kl.supportLocalStorageCapacityIsolation()); err != nil { + if err := kl.containerManager.Start(context.TODO(), node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService, kl.supportLocalStorageCapacityIsolation()); err != nil { // Fail kubelet and rely on the babysitter to retry starting kubelet. klog.ErrorS(err, "Failed to start ContainerManager") os.Exit(1) @@ -2075,7 +2075,7 @@ func (kl *Kubelet) SyncTerminatingPod(_ context.Context, pod *v1.Pod, podStatus // and BEFORE the pod status is changed on the API server // to avoid race conditions with the resource deallocation code in kubernetes core. if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { - if err := kl.UnprepareDynamicResources(pod); err != nil { + if err := kl.UnprepareDynamicResources(ctx, pod); err != nil { return err } } @@ -3057,14 +3057,14 @@ func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool { // PrepareDynamicResources calls the container Manager PrepareDynamicResources API // This method implements the RuntimeHelper interface -func (kl *Kubelet) PrepareDynamicResources(pod *v1.Pod) error { - return kl.containerManager.PrepareDynamicResources(pod) +func (kl *Kubelet) PrepareDynamicResources(ctx context.Context, pod *v1.Pod) error { + return kl.containerManager.PrepareDynamicResources(ctx, pod) } // UnprepareDynamicResources calls the container Manager UnprepareDynamicResources API // This method implements the RuntimeHelper interface -func (kl *Kubelet) UnprepareDynamicResources(pod *v1.Pod) error { - return kl.containerManager.UnprepareDynamicResources(pod) +func (kl *Kubelet) UnprepareDynamicResources(ctx context.Context, pod *v1.Pod) error { + return kl.containerManager.UnprepareDynamicResources(ctx, pod) } func (kl *Kubelet) warnCgroupV1Usage() { diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 96a51c08d2c54..6867a4d4a1ff9 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -591,7 +591,7 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string { func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod, container *v1.Container, podIP string, podIPs []string, imageVolumes kubecontainer.ImageVolumes) (*kubecontainer.RunContainerOptions, func(), error) { supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod) - opts, err := kl.containerManager.GetResources(pod, container) + opts, err := kl.containerManager.GetResources(ctx, pod, container) if err != nil { return nil, nil, err } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 2c7a8af44f404..78e2ce5f2b59b 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -1138,7 +1138,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po // Prepare resources allocated by the Dynammic Resource Allocation feature for the pod if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) { - if err := m.runtimeHelper.PrepareDynamicResources(pod); err != nil { + if err := m.runtimeHelper.PrepareDynamicResources(ctx, pod); err != nil { ref, referr := ref.GetReference(legacyscheme.Scheme, pod) if referr != nil { klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))