From 04ad946e8fb5b07a74bd4b41d8432049396306d8 Mon Sep 17 00:00:00 2001 From: Moshe Levi Date: Sun, 19 Mar 2023 09:46:35 +0200 Subject: [PATCH] kubelet dra: lock before getting claimInfo CDIDevices and annotations fields Currently claimInfo CDIDevices and annotations access directly without RLock. This can lead to concurrent read write error. To avoid it we added RLock all before getting the CDIDevices and annotations Signed-off-by: Moshe Levi --- pkg/kubelet/cm/container_manager_linux.go | 2 ++ pkg/kubelet/cm/dra/manager.go | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 02cb34ddcdc37..b2868d9d3682a 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -978,6 +978,7 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C } for _, containerClaimInfo := range containerClaimInfos { var claimResources []*podresourcesapi.ClaimResource + containerClaimInfo.RLock() // TODO: Currently we maintain a list of ClaimResources, each of which contains // a set of CDIDevices from a different kubelet plugin. In the future we may want to // include the name of the kubelet plugin and/or other types of resources that are @@ -989,6 +990,7 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C } claimResources = append(claimResources, &podresourcesapi.ClaimResource{CDIDevices: cdiDevices}) } + containerClaimInfo.RUnlock() containerDynamicResource := podresourcesapi.DynamicResource{ ClassName: containerClaimInfo.ClassName, ClaimName: containerClaimInfo.ClaimName, diff --git a/pkg/kubelet/cm/dra/manager.go b/pkg/kubelet/cm/dra/manager.go index ea171f0b7b60a..efb49d6e95077 100644 --- a/pkg/kubelet/cm/dra/manager.go +++ b/pkg/kubelet/cm/dra/manager.go @@ -202,6 +202,7 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta return nil, fmt.Errorf("unable to get resource for namespace: %s, claim: %s", pod.Namespace, claimName) } + claimInfo.RLock() klog.V(3).InfoS("Add resource annotations", "claim", claimName, "annotations", claimInfo.annotations) annotations = append(annotations, claimInfo.annotations...) for _, devices := range claimInfo.CDIDevices { @@ -209,6 +210,7 @@ func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*Conta cdiDevices = append(cdiDevices, kubecontainer.CDIDevice{Name: device}) } } + claimInfo.RUnlock() } } @@ -282,8 +284,8 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { resourceHandle.Data) if err != nil { return fmt.Errorf( - "NodeUnprepareResource failed, pod: %s, claim UID: %s, claim name: %s, CDI devices: %s, err: %+v", - pod.Name, claimInfo.ClaimUID, claimInfo.ClaimName, claimInfo.CDIDevices, err) + "NodeUnprepareResource failed, pod: %s, claim UID: %s, claim name: %s, resource handle: %s, err: %+v", + pod.Name, claimInfo.ClaimUID, claimInfo.ClaimName, resourceHandle.Data, err) } klog.V(3).InfoS("NodeUnprepareResource succeeded", "response", response) }