Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--wait, wait for more components #7375

Merged
merged 20 commits into from
Apr 4, 2020
Prev Previous commit
Next Next commit
refactor and rebase
  • Loading branch information
medyagh committed Apr 3, 2020
commit 62ca22a66297b51e9007503becf35477d9c5fad3
20 changes: 10 additions & 10 deletions cmd/minikube/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func initMinikubeFlags() {
startCmd.Flags().String(criSocket, "", "The cri socket path to be used.")
startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.")
startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".")
startCmd.Flags().StringSlice(waitComponents, kverify.DefaultWaitsKeys, fmt.Sprintf("comma separated list of kuberentes components to verify and wait for after starting a cluster. defaults to %q, available options: %q . other acceptable values are 'all' or 'none', 'true' and 'false'", strings.Join(kverify.DefaultWaitsKeys, ","), strings.Join(kverify.AllValidWaitsList, ",")))
startCmd.Flags().StringSlice(waitComponents, kverify.DefaultWaitList, fmt.Sprintf("comma separated list of kuberentes components to verify and wait for after starting a cluster. defaults to %q, available options: %q . other acceptable values are 'all' or 'none', 'true' and 'false'", strings.Join(kverify.DefaultWaitList, ","), strings.Join(kverify.AllValidWaitList, ",")))
startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.")
startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.")
startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.")
Expand Down Expand Up @@ -1207,39 +1207,39 @@ func getKubernetesVersion(old *config.ClusterConfig) string {
// returns map of components to wait for
func interpretWaitFlag(cmd cobra.Command) map[string]bool {
if !cmd.Flags().Changed(waitComponents) {
glog.Infof("Wait Components : %+v", kverify.DefaultWaits)
return kverify.DefaultWaits
glog.Infof("Wait Components : %+v", kverify.DefaultWaitComponents)
return kverify.DefaultWaitComponents
}

waitFlags, err := cmd.Flags().GetStringSlice(waitComponents)
if err != nil {
glog.Infof("failed to get wait from flags, will use default wait components : %+v", kverify.DefaultWaits)
return kverify.DefaultWaits
glog.Infof("failed to get wait from flags, will use default wait components : %+v", kverify.DefaultWaitComponents)
return kverify.DefaultWaitComponents
}

// before minikube 1.9.0, wait flag was boolean
if (len(waitFlags) == 1 && waitFlags[0] == "true") || (len(waitFlags) == 1 && waitFlags[0] == "all") {
return kverify.AllWaitsCompos
return kverify.AllWaitComponents
}

// respecting legacy flag format --wait=false
// before minikube 1.9.0, wait flag was boolean
if (len(waitFlags) == 1 && waitFlags[0] == "false") || len(waitFlags) == 1 && waitFlags[0] == "none" {
medyagh marked this conversation as resolved.
Show resolved Hide resolved
return kverify.NoWaitsCompos
return kverify.NoWaitComponents
}

waitCompos := kverify.NoWaitsCompos
waitCompos := kverify.NoWaitComponents
for _, wc := range waitFlags {
seen := false
for _, valid := range kverify.AllValidWaitsList {
for _, valid := range kverify.AllValidWaitList {
if wc == valid {
waitCompos[wc] = true
seen = true
continue
}
}
if !seen {
glog.Warningf("The value %q is invalid for --wait flag. valid options are %q", wc, strings.Join(kverify.AllValidWaitsList, ","))
glog.Warningf("The value %q is invalid for --wait flag. valid options are %q", wc, strings.Join(kverify.AllValidWaitList, ","))
}
}
return waitCompos
Expand Down
106 changes: 53 additions & 53 deletions pkg/minikube/bootstrapper/bsutil/kverify/api_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,44 @@ import (
"k8s.io/minikube/pkg/minikube/cruntime"
)

// WaitForAPIServerProcess waits for api server to be healthy returns error if it doesn't
func WaitForAPIServerProcess(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, start time.Time, timeout time.Duration) error {
glog.Infof("waiting for apiserver process to appear ...")
err := wait.PollImmediate(time.Millisecond*500, timeout, func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during process check")
}

if time.Since(start) > minLogCheckTime {
announceProblems(r, bs, cfg, cr)
time.Sleep(kconst.APICallRetryInterval * 5)
}

if _, ierr := apiServerPID(cr); ierr != nil {
return false, nil
}

return true, nil
})
if err != nil {
return fmt.Errorf("apiserver process never appeared")
}
glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start))
return nil
}

// apiServerPID returns our best guess to the apiserver pid
func apiServerPID(cr command.Runner) (int, error) {
rr, err := cr.RunCmd(exec.Command("sudo", "pgrep", "-xnf", "kube-apiserver.*minikube.*"))
if err != nil {
return 0, err
}
s := strings.TrimSpace(rr.Stdout.String())
return strconv.Atoi(s)
}

// WaitForHealthyAPIServer waits for api server status to be running
func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, ip string, port int, timeout time.Duration) error {
func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, hostname string, port int, timeout time.Duration) error {
glog.Infof("waiting for apiserver healthz status ...")
hStart := time.Now()

Expand All @@ -56,7 +92,7 @@ func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, c
time.Sleep(kconst.APICallRetryInterval * 5)
}

status, err := apiServerHealthz(net.ParseIP(ip), port)
status, err := apiServerHealthz(hostname, port)
if err != nil {
glog.Warningf("status: %v", err)
return false, nil
Expand Down Expand Up @@ -90,44 +126,21 @@ func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, c
return nil
}

// WaitForAPIServerProcess waits for api server to be healthy returns error if it doesn't
func WaitForAPIServerProcess(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, start time.Time, timeout time.Duration) error {
glog.Infof("waiting for apiserver process to appear ...")
err := wait.PollImmediate(time.Millisecond*500, timeout, func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during process check")
}

if time.Since(start) > minLogCheckTime {
announceProblems(r, bs, cfg, cr)
time.Sleep(kconst.APICallRetryInterval * 5)
}

if _, ierr := apiServerPID(cr); ierr != nil {
return false, nil
}

return true, nil
})
// APIServerVersionMatch checks if the server version matches the expected
func APIServerVersionMatch(client *kubernetes.Clientset, expected string) error {
vi, err := client.ServerVersion()
if err != nil {
return fmt.Errorf("apiserver process never appeared")
return errors.Wrap(err, "server version")
}
glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start))
return nil
}

// apiServerPID returns our best guess to the apiserver pid
func apiServerPID(cr command.Runner) (int, error) {
rr, err := cr.RunCmd(exec.Command("sudo", "pgrep", "-xnf", "kube-apiserver.*minikube.*"))
if err != nil {
return 0, err
glog.Infof("control plane version: %s", vi)
if version.CompareKubeAwareVersionStrings(vi.String(), expected) != 0 {
return fmt.Errorf("controlPane = %q, expected: %q", vi.String(), expected)
}
s := strings.TrimSpace(rr.Stdout.String())
return strconv.Atoi(s)
return nil
}

// APIServerStatus returns apiserver status in libmachine style state.State
func APIServerStatus(cr command.Runner, ip net.IP, port int) (state.State, error) {
func APIServerStatus(cr command.Runner, hostname string, port int) (state.State, error) {
glog.Infof("Checking apiserver status ...")

pid, err := apiServerPID(cr)
Expand All @@ -140,34 +153,34 @@ func APIServerStatus(cr command.Runner, ip net.IP, port int) (state.State, error
rr, err := cr.RunCmd(exec.Command("sudo", "egrep", "^[0-9]+:freezer:", fmt.Sprintf("/proc/%d/cgroup", pid)))
if err != nil {
glog.Warningf("unable to find freezer cgroup: %v", err)
return apiServerHealthz(ip, port)
return apiServerHealthz(hostname, port)

}
freezer := strings.TrimSpace(rr.Stdout.String())
glog.Infof("apiserver freezer: %q", freezer)
fparts := strings.Split(freezer, ":")
if len(fparts) != 3 {
glog.Warningf("unable to parse freezer - found %d parts: %s", len(fparts), freezer)
return apiServerHealthz(ip, port)
return apiServerHealthz(hostname, port)
}

rr, err = cr.RunCmd(exec.Command("sudo", "cat", path.Join("/sys/fs/cgroup/freezer", fparts[2], "freezer.state")))
if err != nil {
glog.Errorf("unable to get freezer state: %s", rr.Stderr.String())
return apiServerHealthz(ip, port)
return apiServerHealthz(hostname, port)
}

fs := strings.TrimSpace(rr.Stdout.String())
glog.Infof("freezer state: %q", fs)
if fs == "FREEZING" || fs == "FROZEN" {
return state.Paused, nil
}
return apiServerHealthz(ip, port)
return apiServerHealthz(hostname, port)
}

// apiServerHealthz hits the /healthz endpoint and returns libmachine style state.State
func apiServerHealthz(ip net.IP, port int) (state.State, error) {
url := fmt.Sprintf("https://%s/healthz", net.JoinHostPort(ip.String(), fmt.Sprint(port)))
func apiServerHealthz(hostname string, port int) (state.State, error) {
url := fmt.Sprintf("https://%s/healthz", net.JoinHostPort(hostname, fmt.Sprint(port)))
glog.Infof("Checking apiserver healthz at %s ...", url)
// To avoid: x509: certificate signed by unknown authority
tr := &http.Transport{
Expand All @@ -191,16 +204,3 @@ func apiServerHealthz(ip net.IP, port int) (state.State, error) {
}
return state.Running, nil
}

// APIServerVersionMatch checks if the server version matches the expected
func APIServerVersionMatch(client *kubernetes.Clientset, expected string) error {
vi, err := client.ServerVersion()
if err != nil {
return errors.Wrap(err, "server version")
}
glog.Infof("control plane version: %s", vi)
if version.CompareKubeAwareVersionStrings(vi.String(), expected) != 0 {
return fmt.Errorf("controlPane = %q, expected: %q", vi.String(), expected)
}
return nil
}
2 changes: 1 addition & 1 deletion pkg/minikube/bootstrapper/bsutil/kverify/default_sa.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func WaitForDefaultSA(cs *kubernetes.Clientset) error {
}
return fmt.Errorf("couldn't find default service account")
}
if err := retry.Expo(saReady, 500*time.Millisecond, 30*time.Second); err != nil {
if err := retry.Expo(saReady, 500*time.Millisecond, 60*time.Second); err != nil {
return errors.Wrapf(err, "waited %s for SA", time.Since(pStart))
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2019 The Kubernetes Authors All rights reserved.
Copyright 2016 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -26,6 +26,8 @@ import (
"github.com/docker/machine/libmachine/state"
"github.com/golang/glog"
core "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/minikube/pkg/minikube/bootstrapper"
"k8s.io/minikube/pkg/minikube/command"
Expand All @@ -34,31 +36,78 @@ import (
"k8s.io/minikube/pkg/minikube/logs"
)

// minLogCheckTime how long to wait before spamming error logs to console
const minLogCheckTime = 60 * time.Second

const (
// minLogCheckTime how long to wait before spamming error logs to console
minLogCheckTime = 30 * time.Second
// APIServerWait is the name used in the flags for k8s api server
APIServerWait = "apiserver"
// SystemPodsWait is the name used in the flags for pods in the kube system
SystemPodsWait = "system_pods"
// DefaultServiceAccountWait is the name used in the flags for default service account
DefaultServiceAccountWait = "default_sa"
// APIServerWaitKey is the name used in the flags for k8s api server
APIServerWaitKey = "apiserver"
// SystemPodsWaitKey is the name used in the flags for pods in the kube system
SystemPodsWaitKey = "system_pods"
// DefaultSAWaitKey is the name used in the flags for default service account
DefaultSAWaitKey = "default_sa"
)

// DefaultWaits is map of the the default components to wait for
var DefaultWaits = map[string]bool{APIServerWait: true, SystemPodsWait: true}
// vars related to the --wait flag
var (
// DefaultWaitComponents is map of the the default components to wait for
DefaultWaitComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
NoWaitComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false}
// AllWaitComponents is map for waiting for all components.
AllWaitComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true}
// DefaultWaitList is list of all default components to wait for
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
// AllValidWaitList list of all valid components to wait for
AllValidWaitList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey}
)

// DefaultWaitsKeys is list of all default components to wait for
var DefaultWaitsKeys = []string{APIServerWait, SystemPodsWait}
// ShouldWait will return true if the config says need to wait
func ShouldWait(wcs map[string]bool) bool {
return wcs[APIServerWaitKey] || wcs[SystemPodsWaitKey] || wcs[DefaultSAWaitKey]
}

// ExpectedComponentsRunning returns whether or not all expected components are running
func ExpectedComponentsRunning(cs *kubernetes.Clientset) error {
expected := []string{
"kube-dns", // coredns
"etcd",
"kube-apiserver",
"kube-controller-manager",
"kube-proxy",
"kube-scheduler",
}

// NoWaitsCompos is map of componets to wait for if specified 'none' or 'false'
var NoWaitsCompos = map[string]bool{APIServerWait: false, SystemPodsWait: false, DefaultServiceAccountWait: false}
found := map[string]bool{}

// AllWaitsCompos is map for waiting for all components.
var AllWaitsCompos = map[string]bool{APIServerWait: true, SystemPodsWait: true, DefaultServiceAccountWait: true}
pods, err := cs.CoreV1().Pods("kube-system").List(meta.ListOptions{})
if err != nil {
return err
}

for _, pod := range pods.Items {
glog.Infof("found pod: %s", podStatusMsg(pod))
if pod.Status.Phase != core.PodRunning {
continue
}
for k, v := range pod.ObjectMeta.Labels {
if k == "component" || k == "k8s-app" {
found[v] = true
}
}
}

// AllValidWaitsList list of all valid components to wait for
var AllValidWaitsList = []string{APIServerWait, SystemPodsWait, DefaultServiceAccountWait}
missing := []string{}
for _, e := range expected {
if !found[e] {
missing = append(missing, e)
}
}
if len(missing) > 0 {
return fmt.Errorf("missing components: %v", strings.Join(missing, ", "))
}
return nil
}

// podStatusMsg returns a human-readable pod status, for generating debug status
func podStatusMsg(pod core.Pod) string {
Expand All @@ -80,11 +129,6 @@ func podStatusMsg(pod core.Pod) string {
return sb.String()
}

// DontWait will return true if the config is no need to wait
func DontWait(wcs map[string]bool) bool {
return !wcs[APIServerWait] && !wcs[SystemPodsWait] && !wcs[DefaultServiceAccountWait]
}

// announceProblems checks for problems, and slows polling down if any are found
func announceProblems(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner) {
problems := logs.FindProblems(r, bs, cfg, cr)
Expand Down
Loading