From 083f29158f92d7cecf5f04704269a33a5037d52c Mon Sep 17 00:00:00 2001 From: Jeff Lowdermilk Date: Thu, 5 Mar 2015 12:04:00 -0800 Subject: [PATCH] Make e2e pod start timeouts uniform --- test/e2e/events.go | 4 +-- test/e2e/kubectl.go | 3 +- test/e2e/pd.go | 14 +++----- test/e2e/pods.go | 14 +++----- test/e2e/rc.go | 2 +- test/e2e/secrets.go | 2 +- test/e2e/service.go | 6 ++-- test/e2e/util.go | 80 +++++++++++++++++++++------------------------ 8 files changed, 54 insertions(+), 71 deletions(-) diff --git a/test/e2e/events.go b/test/e2e/events.go index 098a51fea3f73..bbb318dbc068f 100644 --- a/test/e2e/events.go +++ b/test/e2e/events.go @@ -79,9 +79,7 @@ var _ = Describe("Events", func() { Failf("Failed to create pod: %v", err) } - By("waiting for the pod to start running") - err := waitForPodRunning(c, pod.Name, 300*time.Second) - Expect(err).NotTo(HaveOccurred()) + expectNoError(waitForPodRunning(c, pod.Name)) By("verifying the pod is in kubernetes") pods, err := podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))) diff --git a/test/e2e/kubectl.go b/test/e2e/kubectl.go index e36d859da707b..82d256cf41b02 100644 --- a/test/e2e/kubectl.go +++ b/test/e2e/kubectl.go @@ -35,7 +35,6 @@ const ( kittenImage = "kubernetes/update-demo:kitten" updateDemoSelector = "name=update-demo" updateDemoContainer = "update-demo" - validateTimeout = 10 * time.Minute // TODO: Make this 30 seconds once #4566 is resolved. kubectlProxyPort = 8011 ) @@ -120,7 +119,7 @@ func validateController(c *client.Client, image string, replicas int) { getImageTemplate := fmt.Sprintf(`--template={{(index .currentState.info "%s").image}}`, updateDemoContainer) By(fmt.Sprintf("waiting for all containers in %s pods to come up.", updateDemoSelector)) - for start := time.Now(); time.Since(start) < validateTimeout; time.Sleep(5 * time.Second) { + for start := time.Now(); time.Since(start) < podStartTimeout; time.Sleep(5 * time.Second) { getPodsOutput := runKubectl("get", "pods", "-o", "template", getPodsTemplate, "-l", updateDemoSelector) pods := strings.Fields(getPodsOutput) if numPods := len(pods); numPods != replicas { diff --git a/test/e2e/pd.go b/test/e2e/pd.go index 02343de55dba1..625a62c554be3 100644 --- a/test/e2e/pd.go +++ b/test/e2e/pd.go @@ -76,8 +76,7 @@ var _ = Describe("PD", func() { _, err := podClient.Create(host0Pod) expectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err)) - By("waiting up to 180 seconds for host0Pod to start running") - expectNoError(waitForPodRunning(c, host0Pod.Name, 180*time.Second), "host0Pod not running after 180 seconds") + expectNoError(waitForPodRunning(c, host0Pod.Name)) By("deleting host0Pod") expectNoError(podClient.Delete(host0Pod.Name), "Failed to delete host0Pod") @@ -86,8 +85,7 @@ var _ = Describe("PD", func() { _, err = podClient.Create(host1Pod) expectNoError(err, "Failed to create host1Pod") - By("waiting up to 180 seconds for host1Pod to start running") - expectNoError(waitForPodRunning(c, host1Pod.Name, 180*time.Second), "host1Pod not running after 180 seconds") + expectNoError(waitForPodRunning(c, host1Pod.Name)) By("deleting host1Pod") expectNoError(podClient.Delete(host1Pod.Name), "Failed to delete host1Pod") @@ -128,7 +126,7 @@ var _ = Describe("PD", func() { By("submitting rwPod to ensure PD is formatted") _, err := podClient.Create(rwPod) expectNoError(err, "Failed to create rwPod") - expectNoError(waitForPodRunning(c, rwPod.Name, 180*time.Second), "rwPod not running after 180 seconds") + expectNoError(waitForPodRunning(c, rwPod.Name)) expectNoError(podClient.Delete(rwPod.Name), "Failed to delete host0Pod") By("submitting host0ROPod to kubernetes") @@ -139,11 +137,9 @@ var _ = Describe("PD", func() { _, err = podClient.Create(host1ROPod) expectNoError(err, "Failed to create host1ROPod") - By("waiting up to 180 seconds for host0ROPod to start running") - expectNoError(waitForPodRunning(c, host0ROPod.Name, 180*time.Second), "host0ROPod not running after 180 seconds") + expectNoError(waitForPodRunning(c, host0ROPod.Name)) - By("waiting up to 180 seconds for host1ROPod to start running") - expectNoError(waitForPodRunning(c, host1ROPod.Name, 180*time.Second), "host1ROPod not running after 180 seconds") + expectNoError(waitForPodRunning(c, host1ROPod.Name)) By("deleting host0ROPod") expectNoError(podClient.Delete(host0ROPod.Name), "Failed to delete host0ROPod") diff --git a/test/e2e/pods.go b/test/e2e/pods.go index 89e8f8f3cf2bf..3dca34a6abb07 100644 --- a/test/e2e/pods.go +++ b/test/e2e/pods.go @@ -47,8 +47,7 @@ func runLivenessTest(c *client.Client, podDescr *api.Pod) { // Wait until the pod is not pending. (Here we need to check for something other than // 'Pending' other than checking for 'Running', since when failures occur, we go to // 'Terminated' which can cause indefinite blocking.) - By("waiting for the pod to be something other than pending") - expectNoError(waitForPodNotPending(c, ns, podDescr.Name, 60*time.Second), + expectNoError(waitForPodNotPending(c, ns, podDescr.Name), fmt.Sprintf("starting pod %s in namespace %s", podDescr.Name, ns)) By(fmt.Sprintf("Started pod %s in namespace %s", podDescr.Name, ns)) @@ -190,8 +189,7 @@ var _ = Describe("Pods", func() { Fail(fmt.Sprintf("Failed to create pod: %v", err)) } - By("waiting for the pod to start running") - expectNoError(waitForPodRunning(c, pod.Name, 300*time.Second)) + expectNoError(waitForPodRunning(c, pod.Name)) By("verifying the pod is in kubernetes") pods, err := podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))) @@ -213,8 +211,7 @@ var _ = Describe("Pods", func() { Fail(fmt.Sprintf("Failed to update pod: %v", err)) } - By("waiting for the updated pod to start running") - expectNoError(waitForPodRunning(c, pod.Name, 300*time.Second)) + expectNoError(waitForPodRunning(c, pod.Name)) By("verifying the updated pod is in kubernetes") pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))) @@ -246,7 +243,7 @@ var _ = Describe("Pods", func() { if err != nil { Fail(fmt.Sprintf("Failed to create serverPod: %v", err)) } - expectNoError(waitForPodRunning(c, serverPod.Name, 300*time.Second)) + expectNoError(waitForPodRunning(c, serverPod.Name)) // This service exposes port 8080 of the test pod as a service on port 8765 // TODO(filbranden): We would like to use a unique service name such as: @@ -305,8 +302,7 @@ var _ = Describe("Pods", func() { Fail(fmt.Sprintf("Failed to create pod: %v", err)) } - // Wait for client pod to complete. - expectNoError(waitForPodRunning(c, clientPod.Name, 60*time.Second)) + expectNoError(waitForPodRunning(c, clientPod.Name)) // Grab its logs. Get host first. clientPodStatus, err := c.Pods(api.NamespaceDefault).Get(clientPod.Name) diff --git a/test/e2e/rc.go b/test/e2e/rc.go index d0da049edbcf4..cf67dd62edc2d 100644 --- a/test/e2e/rc.go +++ b/test/e2e/rc.go @@ -133,7 +133,7 @@ func ServeImageOrFail(c *client.Client, test string, image string) { // Wait for the pods to enter the running state. Waiting loops until the pods // are running so non-running pods cause a timeout for this test. for _, pod := range pods.Items { - err = waitForPodRunning(c, pod.Name, 300*time.Second) + err = waitForPodRunning(c, pod.Name) Expect(err).NotTo(HaveOccurred()) } diff --git a/test/e2e/secrets.go b/test/e2e/secrets.go index bf2e6f749b501..8196ae0e6ea1b 100644 --- a/test/e2e/secrets.go +++ b/test/e2e/secrets.go @@ -115,7 +115,7 @@ var _ = Describe("Secrets", func() { Failf("Failed to create pod: %v", err) } // Wait for client pod to complete. - expectNoError(waitForPodRunning(c, clientPod.Name, 60*time.Second)) + expectNoError(waitForPodRunning(c, clientPod.Name)) // Grab its logs. Get host first. clientPodStatus, err := c.Pods(ns).Get(clientPod.Name) diff --git a/test/e2e/service.go b/test/e2e/service.go index 406efba9ec154..22e566929e08c 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -116,12 +116,10 @@ var _ = Describe("Services", func() { Failf("Failed to create %s pod: %v", pod.Name, err) } - By("waiting for the pod to start running") - err := waitForPodRunning(c, pod.Name, 300*time.Second) - Expect(err).NotTo(HaveOccurred()) + expectNoError(waitForPodRunning(c, pod.Name)) By("retrieving the pod") - pod, err = podClient.Get(pod.Name) + pod, err := podClient.Get(pod.Name) if err != nil { Failf("Failed to get pod %s: %v", pod.Name, err) } diff --git a/test/e2e/util.go b/test/e2e/util.go index 043a2137446e0..22f86cdd0686f 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -31,6 +31,12 @@ import ( . "github.com/onsi/gomega" ) +const ( + // Initial pod start can be delayed O(minutes) by slow docker pulls + // TODO: Make this 30 seconds once #4566 is resolved. + podStartTimeout = 5 * time.Minute +) + type testContextType struct { authConfig string certDir string @@ -50,55 +56,45 @@ func Failf(format string, a ...interface{}) { Fail(fmt.Sprintf(format, a...), 1) } -func waitForPodRunning(c *client.Client, id string, tryFor time.Duration) error { - trySecs := int(tryFor.Seconds()) - for i := 0; i <= trySecs; i += 5 { - time.Sleep(5 * time.Second) - pod, err := c.Pods(api.NamespaceDefault).Get(id) +type podCondition func(pod *api.Pod) (bool, error) + +func waitForPodCondition(c *client.Client, ns, podName, desc string, condition podCondition) error { + By(fmt.Sprintf("waiting up to %v for pod %s status to be %s", podStartTimeout, podName, desc)) + for start := time.Now(); time.Since(start) < podStartTimeout; time.Sleep(5 * time.Second) { + pod, err := c.Pods(ns).Get(podName) if err != nil { - return fmt.Errorf("Get pod %s failed: %v", id, err.Error()) + Logf("Get pod failed, ignoring for 5s: %v", err) + continue } - if pod.Status.Phase == api.PodRunning { - return nil + done, err := condition(pod) + if done { + return err } - Logf("Waiting for pod %s status to be %q (found %q) (%d secs)", id, api.PodRunning, pod.Status.Phase, i) + Logf("Waiting for pod %s status to be %q (found %q) (%d secs)", podName, api.PodRunning, pod.Status.Phase, time.Since(start).Seconds()) } - return fmt.Errorf("Gave up waiting for pod %s to be running after %d seconds", id, trySecs) + return fmt.Errorf("gave up waiting for pod %s to be %s after %v", podName, podStartTimeout) } -// waitForPodNotPending returns false if it took too long for the pod to go out of pending state. -func waitForPodNotPending(c *client.Client, ns, podName string, tryFor time.Duration) error { - trySecs := int(tryFor.Seconds()) - for i := 0; i <= trySecs; i += 5 { - if i > 0 { - time.Sleep(5 * time.Second) - } - pod, err := c.Pods(ns).Get(podName) - if err != nil { - Logf("Get pod %s in namespace %s failed, ignoring for 5s: %v", podName, ns, err) - continue - } +func waitForPodRunning(c *client.Client, podName string) error { + return waitForPodCondition(c, api.NamespaceDefault, podName, "running", func(pod *api.Pod) (bool, error) { + return (pod.Status.Phase == api.PodRunning), nil + }) +} + +// waitForPodNotPending returns an error if it took too long for the pod to go out of pending state. +func waitForPodNotPending(c *client.Client, ns, podName string) error { + return waitForPodCondition(c, ns, podName, "!pending", func(pod *api.Pod) (bool, error) { if pod.Status.Phase != api.PodPending { Logf("Saw pod %s in namespace %s out of pending state (found %q)", podName, ns, pod.Status.Phase) - return nil + return true, nil } - Logf("Waiting for status of pod %s in namespace %s to be !%q (found %q) (%v secs)", podName, ns, api.PodPending, pod.Status.Phase, i) - } - return fmt.Errorf("Gave up waiting for status of pod %s in namespace %s to go out of pending after %d seconds", podName, ns, trySecs) + return false, nil + }) } -// waitForPodSuccess returns true if the pod reached state success, or false if it reached failure or ran too long. -func waitForPodSuccess(c *client.Client, podName string, contName string, tryFor time.Duration) error { - trySecs := int(tryFor.Seconds()) - for i := 0; i <= trySecs; i += 5 { - if i > 0 { - time.Sleep(5 * time.Second) - } - pod, err := c.Pods(api.NamespaceDefault).Get(podName) - if err != nil { - Logf("Get pod failed, ignoring for 5s: %v", err) - continue - } +// waitForPodSuccess returns nil if the pod reached state success, or an error if it reached failure or ran too long. +func waitForPodSuccess(c *client.Client, podName string, contName string) error { + return waitForPodCondition(c, api.NamespaceDefault, podName, "success or failure", func(pod *api.Pod) (bool, error) { // Cannot use pod.Status.Phase == api.PodSucceeded/api.PodFailed due to #2632 ci, ok := pod.Status.Info[contName] if !ok { @@ -107,17 +103,17 @@ func waitForPodSuccess(c *client.Client, podName string, contName string, tryFor if ci.State.Termination != nil { if ci.State.Termination.ExitCode == 0 { By("Saw pod success") - return nil + return true, nil } else { - Logf("Saw pod failure: %+v", ci.State.Termination) + return true, fmt.Errorf("pod %s terminated with failure: %+v", podName, ci.State.Termination) } Logf("Waiting for pod %q status to be success or failure", podName) } else { Logf("Nil State.Termination for container %s in pod %s so far", contName, podName) } } - } - return fmt.Errorf("Gave up waiting for pod %q status to be success or failure after %d seconds", podName, trySecs) + return false, nil + }) } func loadConfig() (*client.Config, error) {