Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assume volume is detached if node doesn't exist #29485

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions pkg/cloudprovider/providers/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ func (c *Cloud) NodeAddresses(name string) ([]api.NodeAddress, error) {
}
instance, err := c.getInstanceByNodeName(name)
if err != nil {
return nil, err
return nil, fmt.Errorf("getInstanceByNodeName failed for %q with %v", name, err)
}

addresses := []api.NodeAddress{}
Expand Down Expand Up @@ -869,7 +869,7 @@ func (c *Cloud) InstanceID(name string) (string, error) {
}
inst, err := c.getInstanceByNodeName(name)
if err != nil {
return "", err
return "", fmt.Errorf("getInstanceByNodeName failed for %q with %v", name, err)
Copy link
Member

@justinsb justinsb Jul 23, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we have to be really careful with when we return InstanceNotFound. According to the comments on cloudprovider.Instances, we must return InstanceNotFound, but actually that's wrong and it only applies to ExternalID (AFAICT), and you didn't wrap it there. I'll put in a separate PR to fix the comments.

To (try to) be clear: But what you're done here is right, though it would be wrong if the docs were right. At least I think so!

}
return "/" + orEmpty(inst.Placement.AvailabilityZone) + "/" + orEmpty(inst.InstanceId), nil
}
Expand All @@ -881,7 +881,7 @@ func (c *Cloud) InstanceType(name string) (string, error) {
}
inst, err := c.getInstanceByNodeName(name)
if err != nil {
return "", err
return "", fmt.Errorf("getInstanceByNodeName failed for %q with %v", name, err)
}
return orEmpty(inst.InstanceType), nil
}
Expand Down Expand Up @@ -1336,7 +1336,7 @@ func (c *Cloud) getAwsInstance(nodeName string) (*awsInstance, error) {
} else {
instance, err := c.getInstanceByNodeName(nodeName)
if err != nil {
return nil, fmt.Errorf("error finding instance %s: %v", nodeName, err)
return nil, err
}

awsInstance = newAWSInstance(c.ec2, instance)
Expand All @@ -1354,7 +1354,7 @@ func (c *Cloud) AttachDisk(diskName string, instanceName string, readOnly bool)

awsInstance, err := c.getAwsInstance(instanceName)
if err != nil {
return "", err
return "", fmt.Errorf("error finding instance %s: %v", instanceName, err)
}

if readOnly {
Expand Down Expand Up @@ -1419,6 +1419,15 @@ func (c *Cloud) DetachDisk(diskName string, instanceName string) (string, error)

awsInstance, err := c.getAwsInstance(instanceName)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// If instance no longer exists, safe to assume volume is not attached.
glog.Warningf(
"Instance %q does not exist. DetachDisk will assume disk %q is not attached to it.",
instanceName,
diskName)
return "", nil
}

return "", err
}

Expand Down Expand Up @@ -1562,6 +1571,15 @@ func (c *Cloud) GetDiskPath(volumeName string) (string, error) {
func (c *Cloud) DiskIsAttached(diskName, instanceID string) (bool, error) {
awsInstance, err := c.getAwsInstance(instanceID)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// If instance no longer exists, safe to assume volume is not attached.
glog.Warningf(
"Instance %q does not exist. DiskIsAttached will assume disk %q is not attached to it.",
instanceID,
diskName)
return false, nil
}

return false, err
}

Expand Down Expand Up @@ -2918,7 +2936,7 @@ func (c *Cloud) findInstanceByNodeName(nodeName string) (*ec2.Instance, error) {
func (c *Cloud) getInstanceByNodeName(nodeName string) (*ec2.Instance, error) {
instance, err := c.findInstanceByNodeName(nodeName)
if err == nil && instance == nil {
return nil, fmt.Errorf("no instances found for name: %s", nodeName)
return nil, cloudprovider.InstanceNotFound
}
return instance, err
}
Expand Down
18 changes: 18 additions & 0 deletions pkg/cloudprovider/providers/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -2367,6 +2367,15 @@ func (gce *GCECloud) AttachDisk(diskName, instanceID string, readOnly bool) erro
func (gce *GCECloud) DetachDisk(devicePath, instanceID string) error {
inst, err := gce.getInstanceByName(instanceID)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// If instance no longer exists, safe to assume volume is not attached.
glog.Warningf(
"Instance %q does not exist. DetachDisk will assume PD %q is not attached to it.",
instanceID,
devicePath)
return nil
}

return fmt.Errorf("error getting instance %q", instanceID)
}

Expand All @@ -2381,6 +2390,15 @@ func (gce *GCECloud) DetachDisk(devicePath, instanceID string) error {
func (gce *GCECloud) DiskIsAttached(diskName, instanceID string) (bool, error) {
instance, err := gce.getInstanceByName(instanceID)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// If instance no longer exists, safe to assume volume is not attached.
glog.Warningf(
"Instance %q does not exist. DiskIsAttached will assume PD %q is not attached to it.",
instanceID,
diskName)
return false, nil
}

return false, err
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@ func (nsu *nodeStatusUpdater) UpdateNodeStatuses() error {
for nodeName, attachedVolumes := range nodesToUpdate {
nodeObj, exists, err := nsu.nodeInformer.GetStore().GetByKey(nodeName)
if nodeObj == nil || !exists || err != nil {
return fmt.Errorf(
"failed to find node %q in NodeInformer cache. %v",
// If node does not exist, its status cannot be updated, log error and move on.
glog.Warningf(
"Could not update node status. Failed to find node %q in NodeInformer cache. %v",
nodeName,
err)
return nil
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be continuing (as per the comment), rather than returning from the whole func?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The behavior is correct. Perhaps the comment can be clarified. "Move on" in this case means abandon trying to update the status of a node (since it no longer exists).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, wasn't sure if it was intending to also short-circuit and not update any of the other remaining nodes, or if a bad nodeName could permanently block ever getting to update those nodes' status

}

node, ok := nodeObj.(*api.Node)
Expand Down