Skip to content

Commit

Permalink
Add logging and event for failed migration
Browse files Browse the repository at this point in the history
Longhorn 3401

Signed-off-by: Eric Weber <eric.weber@suse.com>
  • Loading branch information
ejweber authored and mergify[bot] committed Jul 8, 2024
1 parent c9eef3b commit d4bbbe6
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
2 changes: 2 additions & 0 deletions constant/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,6 @@ const (
EventReasonUpgrade = "Upgrade"

EventReasonRolloutSkippedFmt = "RolloutSkipped: %v %v"

EventReasonMigrationFailed = "MigrationFailed"
)
11 changes: 8 additions & 3 deletions controller/volume_attachment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,6 @@ func (vac *VolumeAttachmentController) handleVolumeMigration(va *longhorn.Volume
// - We no longer know which node it was migrating from or to.
// - We cannot do an "online" migration anyways, because the volume already crashed.
// Now, we cancel the migration and wait to proceed until the volume is again exclusively attached.

if vol.Spec.NodeID == "" {
vol.Spec.MigrationNodeID = ""
return
Expand Down Expand Up @@ -631,7 +630,7 @@ func (vac *VolumeAttachmentController) handleVolumeAttachment(va *longhorn.Volum
return
}

attachmentTicket := selectAttachmentTicketToAttach(va, vol)
attachmentTicket := vac.selectAttachmentTicketToAttach(va, vol)
if attachmentTicket == nil {
return
}
Expand All @@ -642,7 +641,10 @@ func (vac *VolumeAttachmentController) handleVolumeAttachment(va *longhorn.Volum
setAttachmentParameter(attachmentTicket.Parameters, vol)
}

func selectAttachmentTicketToAttach(va *longhorn.VolumeAttachment, vol *longhorn.Volume) *longhorn.AttachmentTicket {
func (vac *VolumeAttachmentController) selectAttachmentTicketToAttach(va *longhorn.VolumeAttachment,
vol *longhorn.Volume) *longhorn.AttachmentTicket {
log := getLoggerForLHVolumeAttachment(vac.logger, va)

ticketCandidates := []*longhorn.AttachmentTicket{}
for _, attachmentTicket := range va.Spec.AttachmentTickets {
if isCSIAttacherTicketOfRegularRWXVolume(attachmentTicket, vol) {
Expand Down Expand Up @@ -675,6 +677,9 @@ func selectAttachmentTicketToAttach(va *longhorn.VolumeAttachment, vol *longhorn
if util.IsMigratableVolume(vol) &&
maxAttacherPriorityLevel == longhorn.AttacherPriorityLevelCSIAttacher &&
len(highPriorityTicketCandidates) > 1 {
// The check uses > 1, but there should be only two tickets, so log two NodeIDs.
log.Warnf("Volume migration between %v and %v failed; detach volume from extra node to resume",
highPriorityTicketCandidates[0].NodeID, highPriorityTicketCandidates[1].NodeID)
return nil
}

Expand Down
9 changes: 8 additions & 1 deletion controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3842,6 +3842,13 @@ func (c *VolumeController) processMigration(v *longhorn.Volume, es map[string]*l
return nil // There is nothing to do.
}

// The volume is no longer attached or should no longer be attached. We will clean up the migration below by
// removing the extra engine and replicas. Warn the user.
if v.Spec.NodeID == "" || v.Status.CurrentNodeID == "" {
msg := ("Volume migration failed unexpectedly; detach volume from extra node to resume")
c.eventRecorder.Event(v, corev1.EventTypeWarning, constant.EventReasonMigrationFailed, msg)
}

// This is a migration confirmation. We need to switch the CurrentNodeID to NodeID so that currentEngine becomes
// the migration engine.
if v.Spec.NodeID != "" && v.Status.CurrentNodeID != v.Spec.NodeID {
Expand Down Expand Up @@ -3978,7 +3985,7 @@ func (c *VolumeController) processMigration(v *longhorn.Volume, es map[string]*l
return nil
}

log.Info("volume migration engine is ready")
log.Info("Volume migration engine is ready")
return nil
}

Expand Down

0 comments on commit d4bbbe6

Please sign in to comment.