Skip to content

Commit

Permalink
fix(vmclone): delete snapshot and restore after pvc bound
Browse files Browse the repository at this point in the history
The clone process involves `vmsnapshot` and `vmrestore`.
Currently, these are both deleted as soon as the restore is finished.
If the source vm has a pvc the `vmrestore` process creates another pvc, pointing
to the volumesnapshot previously created by the `vmsnapshot`.
With `Immediate` binding mode the pvc(s) created are immediately bound.
When `WaitForFirstConsumer` binding mode is used the pvc will stay in "Pending" state
until a pod is created(the virt-launcher pod).
In the latter case, when the cloned vm is started, it is stuck in "WaitingForVolumeBinding"
becuse the related PVC binding is failing, due to the snapshot source has being deleted.
In other words, we cannot delete the snapshot resources until all the pvc are Bound.

This patch aim to delay the snapshot and the restore deletion until we are sure that all
the pvc are bound.

Signed-off-by: fossedihelm <ffossemo@redhat.com>
  • Loading branch information
fossedihelm committed Dec 15, 2023
1 parent 740f1eb commit a6852f6
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 19 deletions.
13 changes: 13 additions & 0 deletions pkg/controller/virtinformers.go
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,19 @@ func GetVirtualMachineCloneInformerIndexers() cache.Indexers {
return []string{getkey(vmClone, *vmClone.Status.RestoreName)}, nil
}

return nil, nil
},
// Gets: restore key. Returns: clones in phase Succeeded
string(clonev1alpha1.Succeeded): func(obj interface{}) ([]string, error) {
vmClone, ok := obj.(*clonev1alpha1.VirtualMachineClone)
if !ok {
return nil, unexpectedObjectError
}

if vmClone.Status.Phase == clonev1alpha1.Succeeded && vmClone.Status.RestoreName != nil {
return []string{getkey(vmClone, *vmClone.Status.RestoreName)}, nil
}

return nil, nil
},
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/storage/snapshot/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import (
)

const (
restoreNameAnnotation = "restore.kubevirt.io/name"
RestoreNameAnnotation = "restore.kubevirt.io/name"

populatedForPVCAnnotation = "cdi.kubevirt.io/storage.populatedFor"

Expand Down Expand Up @@ -755,7 +755,7 @@ func (t *vmRestoreTarget) createDataVolume(dvt kubevirtv1.DataVolumeTemplateSpec
if newDataVolume.Annotations == nil {
newDataVolume.Annotations = make(map[string]string)
}
newDataVolume.Annotations[restoreNameAnnotation] = t.vmRestore.Name
newDataVolume.Annotations[RestoreNameAnnotation] = t.vmRestore.Name

if _, err = t.controller.Client.CdiClient().CdiV1beta1().DataVolumes(t.vm.Namespace).Create(context.Background(), newDataVolume, v1.CreateOptions{}); err != nil {
t.controller.Recorder.Eventf(t.vm, corev1.EventTypeWarning, restoreDataVolumeCreateErrorEvent, "Error creating restore DataVolume %s: %v", newDataVolume.Name, err)
Expand Down Expand Up @@ -1065,7 +1065,7 @@ func CreateRestorePVCDefFromVMRestore(vmRestoreName, restorePVCName string, volu
}
pvc.Labels[restoreSourceNameLabel] = sourceVmName
pvc.Labels[restoreSourceNamespaceLabel] = sourceVmNamespace
pvc.Annotations[restoreNameAnnotation] = vmRestoreName
pvc.Annotations[RestoreNameAnnotation] = vmRestoreName
return pvc
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/storage/snapshot/restore_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func (ctrl *VMRestoreController) handleDataVolume(obj interface{}) {
}

if dv, ok := obj.(*v1beta1.DataVolume); ok {
restoreName, ok := dv.Annotations[restoreNameAnnotation]
restoreName, ok := dv.Annotations[RestoreNameAnnotation]
if !ok {
return
}
Expand All @@ -207,7 +207,7 @@ func (ctrl *VMRestoreController) handlePVC(obj interface{}) {
}

if pvc, ok := obj.(*corev1.PersistentVolumeClaim); ok {
restoreName, ok := pvc.Annotations[restoreNameAnnotation]
restoreName, ok := pvc.Annotations[RestoreNameAnnotation]
if !ok {
return
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/virt-controller/watch/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@ func (vca *VirtControllerApp) initCloneController() {
var err error
recorder := vca.newRecorder(k8sv1.NamespaceAll, "clone-controller")
vca.vmCloneController, err = clone.NewVmCloneController(
vca.clientSet, vca.vmCloneInformer, vca.vmSnapshotInformer, vca.vmRestoreInformer, vca.vmInformer, vca.vmSnapshotContentInformer, recorder,
vca.clientSet, vca.vmCloneInformer, vca.vmSnapshotInformer, vca.vmRestoreInformer, vca.vmInformer, vca.vmSnapshotContentInformer, vca.persistentVolumeClaimInformer, recorder,
)
if err != nil {
panic(err)
Expand Down
1 change: 1 addition & 0 deletions pkg/virt-controller/watch/application_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ var _ = Describe("Application", func() {
vmRestoreInformer,
vmInformer,
vmSnapshotContentInformer,
pvcInformer,
recorder,
)

Expand Down
71 changes: 59 additions & 12 deletions pkg/virt-controller/watch/clone/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type syncInfoType struct {
restoreReady bool
targetVMName string
targetVMCreated bool
pvcBound bool

isCloneFailing bool
failEvent Event
Expand Down Expand Up @@ -210,14 +211,25 @@ func (ctrl *VMCloneController) syncSourceVMTargetVM(source *k6tv1.VirtualMachine
return syncInfo
}

syncInfo = ctrl.cleanupSnapshot(vmClone, syncInfo)
if syncInfo.toReenqueue() {
return syncInfo
}
fallthrough

syncInfo = ctrl.cleanupRestore(vmClone, syncInfo)
if syncInfo.toReenqueue() {
return syncInfo
case clonev1alpha1.Succeeded:

if vmClone.Status.RestoreName != nil {
syncInfo = ctrl.verifyPVCBound(vmClone, syncInfo)
if syncInfo.toReenqueue() {
return syncInfo
}

syncInfo = ctrl.cleanupRestore(vmClone, syncInfo)
if syncInfo.toReenqueue() {
return syncInfo
}

syncInfo = ctrl.cleanupSnapshot(vmClone, syncInfo)
if syncInfo.toReenqueue() {
return syncInfo
}
}

default:
Expand Down Expand Up @@ -329,8 +341,6 @@ func (ctrl *VMCloneController) updateStatus(origClone *clonev1alpha1.VirtualMach
}

if syncInfo.targetVMCreated {
vmClone.Status.SnapshotName = nil
vmClone.Status.RestoreName = nil
assignPhase(clonev1alpha1.Succeeded)

}
Expand All @@ -342,6 +352,11 @@ func (ctrl *VMCloneController) updateStatus(origClone *clonev1alpha1.VirtualMach
)
}

if syncInfo.pvcBound {
vmClone.Status.SnapshotName = nil
vmClone.Status.RestoreName = nil
}

if !equality.Semantic.DeepEqual(vmClone.Status, origClone.Status) {
if phaseChanged {
log.Log.Object(vmClone).Infof("Changing phase to %s", vmClone.Status.Phase)
Expand Down Expand Up @@ -439,9 +454,9 @@ func (ctrl *VMCloneController) createRestoreFromVm(vmClone *clonev1alpha1.Virtua
func (ctrl *VMCloneController) verifyRestoreReady(vmClone *clonev1alpha1.VirtualMachineClone, sourceNamespace string, syncInfo syncInfoType) syncInfoType {
obj, exists, err := ctrl.restoreInformer.GetStore().GetByKey(getKey(*vmClone.Status.RestoreName, sourceNamespace))
if !exists {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("restore %s is not created yet for clone %s", *vmClone.Status.SnapshotName, vmClone.Name))
return addErrorToSyncInfo(syncInfo, fmt.Errorf("restore %s is not created yet for clone %s", *vmClone.Status.RestoreName, vmClone.Name))
} else if err != nil {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("error getting snapshot %s from cache for clone %s: %v", *vmClone.Status.SnapshotName, vmClone.Name, err))
return addErrorToSyncInfo(syncInfo, fmt.Errorf("error getting restore %s from cache for clone %s: %v", *vmClone.Status.RestoreName, vmClone.Name, err))
}

restore := obj.(*snapshotv1alpha1.VirtualMachineRestore)
Expand All @@ -467,7 +482,7 @@ func (ctrl *VMCloneController) verifyVmReady(vmClone *clonev1alpha1.VirtualMachi
if !exists {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("target VM %s is not created yet for clone %s", targetVMInfo.Name, vmClone.Name))
} else if err != nil {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("error getting VM %s from cache for clone %s: %v", *vmClone.Status.SnapshotName, targetVMInfo.Name, err))
return addErrorToSyncInfo(syncInfo, fmt.Errorf("error getting VM %s from cache for clone %s: %v", targetVMInfo.Name, vmClone.Name, err))
}

ctrl.logAndRecord(vmClone, TargetVMCreated, fmt.Sprintf("created target VM %s for clone %s", targetVMInfo.Name, vmClone.Name))
Expand All @@ -476,6 +491,38 @@ func (ctrl *VMCloneController) verifyVmReady(vmClone *clonev1alpha1.VirtualMachi
return syncInfo
}

func (ctrl *VMCloneController) verifyPVCBound(vmClone *clonev1alpha1.VirtualMachineClone, syncInfo syncInfoType) syncInfoType {
obj, exists, err := ctrl.restoreInformer.GetStore().GetByKey(getKey(*vmClone.Status.RestoreName, vmClone.Namespace))
if !exists {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("restore %s is not created yet for clone %s", *vmClone.Status.RestoreName, vmClone.Name))
} else if err != nil {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("error getting restore %s from cache for clone %s: %v", *vmClone.Status.SnapshotName, vmClone.Name, err))
}

restore := obj.(*snapshotv1alpha1.VirtualMachineRestore)
for _, volumeRestore := range restore.Status.Restores {
obj, exists, err = ctrl.pvcInformer.GetStore().GetByKey(getKey(volumeRestore.PersistentVolumeClaimName, vmClone.Namespace))
if !exists {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("PVC %s is not created yet for clone %s", volumeRestore.PersistentVolumeClaimName, vmClone.Name))
} else if err != nil {
return addErrorToSyncInfo(syncInfo, fmt.Errorf("error getting PVC %s from cache for clone %s: %v", volumeRestore.PersistentVolumeClaimName, vmClone.Name, err))
}

pvc := obj.(*corev1.PersistentVolumeClaim)
if pvc.Status.Phase != corev1.ClaimBound {
syncInfo.logger.V(defaultVerbosityLevel).Infof("pvc %s for clone %s is not bound yet", pvc.Name, vmClone.Name)
syncInfo.needToReenqueue = true
return syncInfo
}
}

ctrl.logAndRecord(vmClone, PVCBound, fmt.Sprintf("all PVC for clone %s are bound", vmClone.Name))
syncInfo.pvcBound = true

return syncInfo

}

func (ctrl *VMCloneController) cleanupSnapshot(vmClone *clonev1alpha1.VirtualMachineClone, syncInfo syncInfoType) syncInfoType {
err := ctrl.client.VirtualMachineSnapshot(vmClone.Namespace).Delete(context.Background(), *vmClone.Status.SnapshotName, v1.DeleteOptions{})
if err != nil && !errors.IsNotFound(err) {
Expand Down
58 changes: 57 additions & 1 deletion pkg/virt-controller/watch/clone/clone_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import (
"fmt"
"time"

k8scorev1 "k8s.io/api/core/v1"

"kubevirt.io/kubevirt/pkg/storage/snapshot"

"kubevirt.io/api/clone"
snapshotv1alpha1 "kubevirt.io/api/snapshot/v1alpha1"

Expand Down Expand Up @@ -31,6 +35,7 @@ const (
RestoreCreated Event = "RestoreCreated"
RestoreReady Event = "RestoreReady"
TargetVMCreated Event = "TargetVMCreated"
PVCBound Event = "PVCBound"

SnapshotDeleted Event = "SnapshotDeleted"
SourceDoesNotExist Event = "SourceDoesNotExist"
Expand All @@ -43,21 +48,23 @@ type VMCloneController struct {
restoreInformer cache.SharedIndexInformer
vmInformer cache.SharedIndexInformer
snapshotContentInformer cache.SharedIndexInformer
pvcInformer cache.SharedIndexInformer
recorder record.EventRecorder

vmCloneQueue workqueue.RateLimitingInterface
vmStatusUpdater *status.VMStatusUpdater
cloneStatusUpdater *status.CloneStatusUpdater
}

func NewVmCloneController(client kubecli.KubevirtClient, vmCloneInformer, snapshotInformer, restoreInformer, vmInformer, snapshotContentInformer cache.SharedIndexInformer, recorder record.EventRecorder) (*VMCloneController, error) {
func NewVmCloneController(client kubecli.KubevirtClient, vmCloneInformer, snapshotInformer, restoreInformer, vmInformer, snapshotContentInformer, pvcInformer cache.SharedIndexInformer, recorder record.EventRecorder) (*VMCloneController, error) {
ctrl := VMCloneController{
client: client,
vmCloneInformer: vmCloneInformer,
snapshotInformer: snapshotInformer,
restoreInformer: restoreInformer,
vmInformer: vmInformer,
snapshotContentInformer: snapshotContentInformer,
pvcInformer: pvcInformer,
recorder: recorder,
vmCloneQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "virt-controller-vmclone"),
vmStatusUpdater: status.NewVMStatusUpdater(client),
Expand Down Expand Up @@ -96,6 +103,18 @@ func NewVmCloneController(client kubecli.KubevirtClient, vmCloneInformer, snapsh
},
)

if err != nil {
return nil, err
}

_, err = ctrl.pvcInformer.AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: ctrl.handlePVC,
UpdateFunc: func(oldObj, newObj interface{}) { ctrl.handlePVC(newObj) },
DeleteFunc: ctrl.handlePVC,
},
)

if err != nil {
return nil, err
}
Expand Down Expand Up @@ -193,6 +212,43 @@ func (ctrl *VMCloneController) handleRestore(obj interface{}) {
}
}

func (ctrl *VMCloneController) handlePVC(obj interface{}) {
if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil {
obj = unknown.Obj
}

pvc, ok := obj.(*k8scorev1.PersistentVolumeClaim)
if !ok {
log.Log.Errorf(unknownTypeErrFmt, "persistentvolumeclaim")
return
}

var (
restoreName string
exists bool
)

if restoreName, exists = pvc.Annotations[snapshot.RestoreNameAnnotation]; !exists {
return
}

if pvc.Status.Phase != k8scorev1.ClaimBound {
return
}

restoreKey := getKey(restoreName, pvc.Namespace)

succeededWaitingKeys, err := ctrl.vmCloneInformer.GetIndexer().IndexKeys(string(clonev1alpha1.Succeeded), restoreKey)
if err != nil {
log.Log.Object(pvc).Reason(err).Error("cannot get clone succeededWaitingKeys from " + string(clonev1alpha1.Succeeded) + " indexer")
return
}

for _, key := range succeededWaitingKeys {
ctrl.vmCloneQueue.AddRateLimited(key)
}
}

func (ctrl *VMCloneController) Run(threadiness int, stopCh <-chan struct{}) error {
defer utilruntime.HandleCrash()
defer ctrl.vmCloneQueue.ShutDown()
Expand Down
Loading

0 comments on commit a6852f6

Please sign in to comment.