Skip to content

Commit

Permalink
fix(provider/kubernetes): v1 eventual consistency bug in disable (spi…
Browse files Browse the repository at this point in the history
…nnaker#2321)

It turns out it was possible that we would

1. apply an annotation
2. toggle replica set labels
3. fail to wait for the label change to be applied

Part 3. could happen because the generation of the replica set returned
from the "toggle label" operation was that of the "apply annotation"
operation. The "wait for consistency" step we usually do here was then
skipped causing us to edit pod labels before the replica set owning them
was finised updating.

This doesn't happen often, but enough to be a real problem.
  • Loading branch information
lwander authored and skim1420 committed Jan 29, 2018
1 parent 02def99 commit 9d77c8b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -498,19 +498,19 @@ class KubernetesApiAdaptor {
}
}

void annotateReplicaSet(String namespace, String name, String key, String value) {
ReplicaSet annotateReplicaSet(String namespace, String name, String key, String value) {
exceptionWrapper("replicaSets.annotate", "Annotate replica set $name", namespace) {
def rs = client.extensions().replicaSets().inNamespace(namespace).withName(name).edit()
rs.buildMetadata().annotations?.put(key, value)
rs.done()
return rs.done()
}
}

void annotateReplicationController(String namespace, String name, String key, String value) {
ReplicationController annotateReplicationController(String namespace, String name, String key, String value) {
exceptionWrapper("replicationControllers.annotate", "Annotate replication controller $name", namespace) {
def rc = client.replicationControllers().inNamespace(namespace).withName(name).edit()
rc.buildMetadata().annotations?.put(key, value)
rc.done()
return rc.done()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,35 +88,45 @@ abstract class AbstractEnableDisableKubernetesAtomicOperation implements AtomicO
def getGeneration = null
def getResource = null
def desired = null
def disableAnnotation = null
if (replicationController) {
credentials.apiAdaptor.annotateReplicationController(namespace, description.serverGroupName, KubernetesUtil.ENABLE_DISABLE_ANNOTATION, action)
desired = credentials.apiAdaptor.toggleReplicationControllerSpecLabels(namespace, description.serverGroupName, services, action)
getGeneration = { ReplicationController rc ->
return rc.metadata.generation
}
getResource = {
return credentials.apiAdaptor.getReplicationController(namespace, description.serverGroupName)
}
disableAnnotation = { ->
return credentials.apiAdaptor.annotateReplicationController(namespace, description.serverGroupName, KubernetesUtil.ENABLE_DISABLE_ANNOTATION, action)
}
} else if (replicaSet) {
credentials.apiAdaptor.annotateReplicaSet(namespace, description.serverGroupName, KubernetesUtil.ENABLE_DISABLE_ANNOTATION, action)
desired = credentials.apiAdaptor.toggleReplicaSetSpecLabels(namespace, description.serverGroupName, services, action)
getGeneration = { ReplicaSet rs ->
return rs.metadata.generation
}
getResource = {
return credentials.apiAdaptor.getReplicaSet(namespace, description.serverGroupName)
}
disableAnnotation = { ->
return credentials.apiAdaptor.annotateReplicaSet(namespace, description.serverGroupName, KubernetesUtil.ENABLE_DISABLE_ANNOTATION, action)
}
} else {
throw new KubernetesOperationException("No replication controller or replica set $description.serverGroupName in $namespace.")
}

if (!credentials.apiAdaptor.blockUntilResourceConsistent(desired, getGeneration, getResource)) {
throw new KubernetesOperationException("Server group failed to reach a consistent state. This is likely a bug with Kubernetes itself.")
throw new KubernetesOperationException("Server group failed to reach a consistent state while waiting for label to be applied. This is likely a bug with Kubernetes itself.")
}

if (!credentials.apiAdaptor.blockUntilResourceConsistent(disableAnnotation(), getGeneration, getResource)) {
throw new KubernetesOperationException("Server group failed to reach a consistent state while waiting for annotation be applied. This is likely a bug with Kubernetes itself.")
}
}

if (!replicationController && !replicaSet )
if (!replicationController && !replicaSet ) {
throw new KubernetesOperationException("No replication controller or replica set $description.serverGroupName in $namespace.")
}

KubernetesV1ServerGroup serverGroup = clusterProviders.getServerGroup(description.account, namespace, description.serverGroupName)
serverGroup.instances.forEach( { instance -> pods.add(instance.getPod())})
Expand Down

0 comments on commit 9d77c8b

Please sign in to comment.