Skip to content

Commit

Permalink
Merge pull request kubernetes#1899 from gnufied/backport-node-resize-…
Browse files Browse the repository at this point in the history
…no-ocp-master

OCPBUGS-10996: Fix race condition between resizer and kubelet
  • Loading branch information
openshift-merge-bot[bot] authored Feb 27, 2024
2 parents 2eba5a9 + 1a9c833 commit 6116860
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 3 deletions.
10 changes: 10 additions & 0 deletions pkg/kubelet/volumemanager/reconciler/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1279,6 +1279,16 @@ func Test_Run_Positive_VolumeFSResizeControllerAttachEnabled(t *testing.T) {
newPVSize: resource.MustParse("15G"),
oldPVSize: resource.MustParse("13G"),
},
{
name: "expand-fs-volume with unsupported error",
volumeMode: &fsMode,
expansionFailed: false,
pvName: volumetesting.FailWithUnSupportedVolumeName,
pvcSize: resource.MustParse("10G"),
pvcStatusSize: resource.MustParse("10G"),
newPVSize: resource.MustParse("15G"),
oldPVSize: resource.MustParse("13G"),
},
}

for _, tc := range tests {
Expand Down
2 changes: 1 addition & 1 deletion pkg/volume/csi/expander.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (c *csiPlugin) nodeExpandWithClient(
}

if !nodeExpandSet {
return false, fmt.Errorf("Expander.NodeExpand found CSI plugin %s/%s to not support node expansion", c.GetPluginName(), driverName)
return false, volumetypes.NewOperationNotSupportedError(fmt.Sprintf("NodeExpand is not supported by the CSI driver %s", driverName))
}

pv := resizeOptions.VolumeSpec.PersistentVolume
Expand Down
9 changes: 7 additions & 2 deletions pkg/volume/testing/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ const (
SuccessAndFailOnMountDeviceName = "success-and-failed-mount-device-name"

// FailWithInUseVolumeName will cause NodeExpandVolume to result in FailedPrecondition error
FailWithInUseVolumeName = "fail-expansion-in-use"
FailWithInUseVolumeName = "fail-expansion-in-use"
FailWithUnSupportedVolumeName = "fail-expansion-unsupported"

FailVolumeExpansion = "fail-expansion-test"

Expand Down Expand Up @@ -500,8 +501,12 @@ func (plugin *FakeVolumePlugin) NodeExpand(resizeOptions volume.NodeResizeOption
if resizeOptions.VolumeSpec.Name() == FailWithInUseVolumeName {
return false, volumetypes.NewFailedPreconditionError("volume-in-use")
}
if resizeOptions.VolumeSpec.Name() == FailWithUnSupportedVolumeName {
return false, volumetypes.NewOperationNotSupportedError("volume-unsupported")
}

if resizeOptions.VolumeSpec.Name() == AlwaysFailNodeExpansion {
return false, fmt.Errorf("Test failure: NodeExpand")
return false, fmt.Errorf("test failure: NodeExpand")
}

if resizeOptions.VolumeSpec.Name() == FailVolumeExpansion {
Expand Down
8 changes: 8 additions & 0 deletions pkg/volume/util/operationexecutor/operation_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2219,6 +2219,14 @@ func (og *operationGenerator) legacyCallNodeExpandOnPlugin(resizeOp nodeResizeOp

_, resizeErr := expandableVolumePlugin.NodeExpand(rsOpts)
if resizeErr != nil {
// This is a workaround for now, until RecoverFromVolumeExpansionFailure feature goes GA.
// If RecoverFromVolumeExpansionFailure feature is enabled, we will not ever hit this state, because
// we will wait for VolumeExpansionPendingOnNode before trying to expand volume in kubelet.
if volumetypes.IsOperationNotSupportedError(resizeErr) {
klog.V(4).InfoS(volumeToMount.GenerateMsgDetailed("MountVolume.NodeExpandVolume failed", "NodeExpandVolume not supported"), "pod", klog.KObj(volumeToMount.Pod))
return true, nil
}

// if driver returned FailedPrecondition error that means
// volume expansion should not be retried on this node but
// expansion operation should not block mounting
Expand Down
17 changes: 17 additions & 0 deletions pkg/volume/util/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,23 @@ func IsFailedPreconditionError(err error) bool {
return errors.As(err, &failedPreconditionError)
}

type OperationNotSupported struct {
msg string
}

func (err *OperationNotSupported) Error() string {
return err.msg
}

func NewOperationNotSupportedError(msg string) *OperationNotSupported {
return &OperationNotSupported{msg: msg}
}

func IsOperationNotSupportedError(err error) bool {
var operationNotSupportedError *OperationNotSupported
return errors.As(err, &operationNotSupportedError)
}

// TransientOperationFailure indicates operation failed with a transient error
// and may fix itself when retried.
type TransientOperationFailure struct {
Expand Down

0 comments on commit 6116860

Please sign in to comment.