Skip to content

Commit

Permalink
Merge pull request #35038 from sjenning/nfs-nonblock-reader2
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue

kubelet: storage: don't hang kubelet on unresponsive nfs

Fixes #31272 

Currently, due to the nature of nfs, an unresponsive nfs volume in a pod can wedge the kubelet such that additional pods can not be run.

The discussion thus far surrounding this issue was to wrap the `lstat`, the syscall that ends up hanging in uninterruptible sleep, in a goroutine and limiting the number of goroutines that hang to one per-pod per-volume.

However, in my investigation, I found that the callsites that request a listing of the volumes from a particular volume plugin directory don't care anything about the properties provided by the `lstat` call.  They only care about whether or not a directory exists.

Given that constraint, this PR just avoids the `lstat` call by using `Readdirnames()` instead of `ReadDir()` or `ReadDirNoExit()`

### More detail for reviewers
Consider the pod mounted nfs volume at `/var/lib/kubelet/pods/881341b5-9551-11e6-af4c-fa163e815edd/volumes/kubernetes.io~nfs/myvol`.  The kubelet wedges because when we do a `ReadDir()` or `ReadDirNoExit()` it calls `syscall.Lstat` on `myvol` which requires communication with the nfs server.  If the nfs server is unreachable, this call hangs forever.

However, for our code, we only care what about the names of files/directory contained in `kubernetes.io~nfs` directory, not any of the more detailed information the `Lstat` call provides.  Getting the names can be done with `Readdirnames()`, which doesn't need to involve the nfs server.

@pmorie @eparis @ncdc @derekwaynecarr @saad-ali @thockin @vishh @kubernetes/rh-cluster-infra
  • Loading branch information
Kubernetes Submit Queue authored Oct 18, 2016
2 parents 62cc431 + da3683e commit 84aa5f6
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 43 deletions.
13 changes: 3 additions & 10 deletions pkg/kubelet/kubelet_getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,18 +252,11 @@ func (kl *Kubelet) getPodVolumeNameListFromDisk(podUID types.UID) ([]string, err
for _, volumePluginDir := range volumePluginDirs {
volumePluginName := volumePluginDir.Name()
volumePluginPath := path.Join(podVolDir, volumePluginName)
volumeDirs, volumeDirsStatErrs, err := util.ReadDirNoExit(volumePluginPath)
volumeDirs, err := util.ReadDirNoStat(volumePluginPath)
if err != nil {
return volumes, fmt.Errorf("Could not read directory %s: %v", volumePluginPath, err)
}
for i, volumeDir := range volumeDirs {
if volumeDir != nil {
volumes = append(volumes, volumeDir.Name())
continue
}
glog.Errorf("Could not read directory %s: %v", podVolDir, volumeDirsStatErrs[i])

return volumes, err
}
volumes = append(volumes, volumeDirs...)
}
return volumes, nil
}
22 changes: 10 additions & 12 deletions pkg/kubelet/volumemanager/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/util/strings"
Expand Down Expand Up @@ -531,24 +532,21 @@ func getVolumesFromPodDir(podDir string) ([]podVolume, error) {
pluginName := volumeDir.Name()
volumePluginPath := path.Join(volumesDir, pluginName)

volumePluginDirs, err := ioutil.ReadDir(volumePluginPath)
volumePluginDirs, err := util.ReadDirNoStat(volumePluginPath)
if err != nil {
glog.Errorf("Could not read volume plugin directory %q: %v", volumePluginPath, err)
continue
}

unescapePluginName := strings.UnescapeQualifiedNameForDisk(pluginName)
for _, volumeNameDir := range volumePluginDirs {
if volumeNameDir != nil {
volumeName := volumeNameDir.Name()
mountPath := path.Join(volumePluginPath, volumeName)
volumes = append(volumes, podVolume{
podName: volumetypes.UniquePodName(podName),
volumeSpecName: volumeName,
mountPath: mountPath,
pluginName: unescapePluginName,
})
}
for _, volumeName := range volumePluginDirs {
mountPath := path.Join(volumePluginPath, volumeName)
volumes = append(volumes, podVolume{
podName: volumetypes.UniquePodName(podName),
volumeSpecName: volumeName,
mountPath: mountPath,
pluginName: unescapePluginName,
})
}

}
Expand Down
26 changes: 5 additions & 21 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,36 +84,20 @@ func FileExists(filename string) (bool, error) {
return true, nil
}

// borrowed from ioutil.ReadDir
// ReadDir reads the directory named by dirname and returns
// a list of directory entries, minus those with lstat errors
func ReadDirNoExit(dirname string) ([]os.FileInfo, []error, error) {
// ReadDirNoStat returns a string of files/directories contained
// in dirname without calling lstat on them.
func ReadDirNoStat(dirname string) ([]string, error) {
if dirname == "" {
dirname = "."
}

f, err := os.Open(dirname)
if err != nil {
return nil, nil, err
return nil, err
}
defer f.Close()

names, err := f.Readdirnames(-1)
list := make([]os.FileInfo, 0, len(names))
errs := make([]error, 0, len(names))
for _, filename := range names {
fip, lerr := os.Lstat(dirname + "/" + filename)
if os.IsNotExist(lerr) {
// File disappeared between readdir + stat.
// Just treat it as if it didn't exist.
continue
}

list = append(list, fip)
errs = append(errs, lerr)
}

return list, errs, nil
return f.Readdirnames(-1)
}

// IntPtr returns a pointer to an int
Expand Down

0 comments on commit 84aa5f6

Please sign in to comment.