Skip to content

Commit

Permalink
Merge pull request kubernetes#52 from mesosphere/sttts-task-lost-duri…
Browse files Browse the repository at this point in the history
…ng-kubelet-pod-launch

MESOS: scheduler: handle lost task status updates during kubelet pod launch
  • Loading branch information
sttts committed Dec 2, 2015
1 parent e28404b commit 271eeb0
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
2 changes: 1 addition & 1 deletion contrib/mesos/pkg/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ waitForRunningPod:
k.lock.Lock()
defer k.lock.Unlock()
reportLost:
k.reportLostTask(driver, taskId, messages.LaunchTaskFailed)
k.reportLostTask(driver, taskId, messages.KubeletPodLaunchFailed)
}

func (k *Executor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) {
Expand Down
1 change: 1 addition & 0 deletions contrib/mesos/pkg/executor/messages/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const (
ExecutorUnregistered = "executor-unregistered"
ExecutorShutdown = "executor-shutdown"
LaunchTaskFailed = "launch-task-failed"
KubeletPodLaunchFailed = "kubelet-pod-launch-failed"
TaskKilled = "task-killed"
TaskLost = "task-lost"
UnmarshalTaskDataFailure = "unmarshal-task-data-failure"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,15 @@ func (k *framework) reconcileTerminalTask(driver bindings.SchedulerDriver, taskS
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) ||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) ||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared) ||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.KubeletPodLaunchFailed) ||
(taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.TaskKilled && !task.Has(podtask.Deleted))) {
//--
// pod-task has metadata that refers to:
// (1) a task that Mesos no longer knows about, or else
// (2) a pod that the Kubelet will never report as "failed"
// (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart)
// (4) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master)
// (4) a pod that the kubeletExecutor reported as lost because the kubelet didn't manage to launch it (in time)
// (5) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master)
// For now, destroy the pod and hope that there's a replication controller backing it up.
// TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed
pod := &task.Pod
Expand Down

0 comments on commit 271eeb0

Please sign in to comment.