Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--wait, wait for more components #7375

Merged
merged 20 commits into from
Apr 4, 2020
Prev Previous commit
Next Next commit
add wait logic to kubeadm
  • Loading branch information
medyagh committed Apr 3, 2020
commit 4c8f79d823281208be0e8666648352914e8edf71
4 changes: 2 additions & 2 deletions cmd/minikube/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func initMinikubeFlags() {
startCmd.Flags().String(criSocket, "", "The cri socket path to be used.")
startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.")
startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".")
startCmd.Flags().StringSlice(waitComponents, kverify.DefaultWaitsKeys, fmt.Sprintf("comma separated list of kuberentes components to wait to verify after start. defaults to %q, available options: %q . can also specify 'all' or 'none'", kverify.DefaultWaitsKeys, kverify.AllWaitsKeys))
startCmd.Flags().StringSlice(waitComponents, kverify.DefaultWaitsKeys, fmt.Sprintf("comma separated list of kuberentes components to wait to verify after start. defaults to %q, available options: %q . can also specify 'all' or 'none'", kverify.DefaultWaitsKeys, kverify.AllValidWaitsList))
startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.")
startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.")
startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.")
Expand Down Expand Up @@ -1218,7 +1218,7 @@ func interpretWaitFlag(cmd cobra.Command) map[string]bool {

// before minikube 1.9.0, wait flag was boolean
if (len(waitFlags) == 1 && waitFlags[0] == "true") || (len(waitFlags) == 1 && waitFlags[0] == "all") {
return kverify.AllWaitsCompo
return kverify.AllWaitsCompos
medyagh marked this conversation as resolved.
Show resolved Hide resolved
}

// respecting legacy flag format --wait=false
Expand Down
37 changes: 35 additions & 2 deletions pkg/minikube/bootstrapper/bsutil/kverify/api_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ import (

"github.com/docker/machine/libmachine/state"
"github.com/golang/glog"
"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/version"
"k8s.io/client-go/kubernetes"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/minikube/pkg/minikube/bootstrapper"
"k8s.io/minikube/pkg/minikube/command"
Expand All @@ -39,7 +42,7 @@ import (
)

// WaitForHealthyAPIServer waits for api server status to be running
func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, start time.Time, ip string, port int, timeout time.Duration) error {
func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, ip string, port int, timeout time.Duration) error {
glog.Infof("waiting for apiserver healthz status ...")
hStart := time.Now()

Expand Down Expand Up @@ -67,7 +70,23 @@ func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, c
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil {
return fmt.Errorf("apiserver healthz never reported healthy")
}
glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart))

vcheck := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during version check")
}
if err := APIServerVersionMatch(client, cfg.KubernetesConfig.KubernetesVersion); err != nil {
glog.Warningf("api server version match failed: %v", err)
return false, nil
}
return true, nil
}

if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, vcheck); err != nil {
return fmt.Errorf("controlPlane never updated to %s", cfg.KubernetesConfig.KubernetesVersion)
}

glog.Infof("duration metric: took %s to wait for apiserver health ...", time.Since(hStart))
return nil
}

Expand All @@ -87,6 +106,7 @@ func WaitForAPIServerProcess(r cruntime.Manager, bs bootstrapper.Bootstrapper, c
if _, ierr := apiServerPID(cr); ierr != nil {
return false, nil
}

return true, nil
})
if err != nil {
Expand Down Expand Up @@ -171,3 +191,16 @@ func apiServerHealthz(ip net.IP, port int) (state.State, error) {
}
return state.Running, nil
}

// APIServerVersionMatch checks if the server version matches the expected
func APIServerVersionMatch(client *kubernetes.Clientset, expected string) error {
vi, err := client.ServerVersion()
if err != nil {
return errors.Wrap(err, "server version")
}
glog.Infof("control plane version: %s", vi)
if version.CompareKubeAwareVersionStrings(vi.String(), expected) != 0 {
return fmt.Errorf("controlPane = %q, expected: %q", vi.String(), expected)
}
return nil
}
245 changes: 7 additions & 238 deletions pkg/minikube/bootstrapper/bsutil/kverify/kverify.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
)

const (
// minLogCheckTime how long to wait before spamming error logs to console
minLogCheckTime = 30 * time.Second
medyagh marked this conversation as resolved.
Show resolved Hide resolved
// APIServerWait is the name used in the flags for k8s api server
APIServerWait = "apiserver"
// SystemPodsWait is the name used in the flags for pods in the kube system
Expand All @@ -52,15 +54,12 @@ var DefaultWaitsKeys = []string{APIServerWait, SystemPodsWait}
// NoWaitsCompos is map of componets to wait for if specified 'none' or 'false'
var NoWaitsCompos = map[string]bool{APIServerWait: false, SystemPodsWait: false, DefaultServiceAccountWait: false}
medyagh marked this conversation as resolved.
Show resolved Hide resolved

// AllWaitsCompo is map for waiting for all components.
var AllWaitsCompo = map[string]bool{APIServerWait: true, SystemPodsWait: true, DefaultServiceAccountWait: true}
// AllWaitsCompos is map for waiting for all components.
var AllWaitsCompos = map[string]bool{APIServerWait: true, SystemPodsWait: true, DefaultServiceAccountWait: true}

// AllValidWaitsList list of all valid components to wait for
var AllValidWaitsList = []string{APIServerWait, SystemPodsWait, DefaultServiceAccountWait}

// minLogCheckTime how long to wait before spamming error logs to console
const minLogCheckTime = 30 * time.Second

// podStatusMsg returns a human-readable pod status, for generating debug status
func podStatusMsg(pod core.Pod) string {
var sb strings.Builder
Expand All @@ -81,103 +80,9 @@ func podStatusMsg(pod core.Pod) string {
return sb.String()
}

// WaitForSystemPods verifies essential pods for running kurnetes is running
func WaitForSystemPods(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, timeout time.Duration) error {
glog.Info("waiting for kube-system pods to appear ...")
pStart := time.Now()

podList := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during pod check")
}
if time.Since(start) > minLogCheckTime {
announceProblems(r, bs, cfg, cr)
time.Sleep(kconst.APICallRetryInterval * 5)
}

// Wait for any system pod, as waiting for apiserver may block until etcd
pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{})
if err != nil {
glog.Warningf("pod list returned error: %v", err)
return false, nil
}
glog.Infof("%d kube-system pods found", len(pods.Items))
for _, pod := range pods.Items {
glog.Infof(podStatusMsg(pod))
}

if len(pods.Items) < 2 {
return false, nil
}
return true, nil
}
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil {
return fmt.Errorf("apiserver never returned a pod list")
}
glog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart))
return nil
}

// WaitForHealthyAPIServer waits for api server status to be running
func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, hostname string, port int, timeout time.Duration) error {
glog.Infof("waiting for apiserver healthz status ...")
hStart := time.Now()

healthz := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during healthz check")
}

if time.Since(start) > minLogCheckTime {
announceProblems(r, bs, cfg, cr)
time.Sleep(kconst.APICallRetryInterval * 5)
}

status, err := apiServerHealthz(hostname, port)
if err != nil {
glog.Warningf("status: %v", err)
return false, nil
}
if status != state.Running {
return false, nil
}
return true, nil
}

if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil {
return fmt.Errorf("apiserver healthz never reported healthy")
}

vcheck := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during version check")
}
if err := APIServerVersionMatch(client, cfg.KubernetesConfig.KubernetesVersion); err != nil {
glog.Warningf("api server version match failed: %v", err)
return false, nil
}
return true, nil
}

if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, vcheck); err != nil {
return fmt.Errorf("controlPlane never updated to %s", cfg.KubernetesConfig.KubernetesVersion)
}

glog.Infof("duration metric: took %s to wait for apiserver health ...", time.Since(hStart))
return nil
}

// APIServerVersionMatch checks if the server version matches the expected
func APIServerVersionMatch(client *kubernetes.Clientset, expected string) error {
vi, err := client.ServerVersion()
if err != nil {
return errors.Wrap(err, "server version")
}
glog.Infof("control plane version: %s", vi)
if version.CompareKubeAwareVersionStrings(vi.String(), expected) != 0 {
return fmt.Errorf("controlPane = %q, expected: %q", vi.String(), expected)
}
return nil
// DontWait will return true if the config is no need to wait
func DontWait(wcs map[string]bool) bool {
return wcs[APIServerWait] == false && wcs[SystemPodsWait] == false && wcs[DefaultServiceAccountWait] == false
}

// announceProblems checks for problems, and slows polling down if any are found
Expand All @@ -189,142 +94,6 @@ func announceProblems(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg conf
}
}

<<<<<<< HEAD
// APIServerStatus returns apiserver status in libmachine style state.State
func APIServerStatus(cr command.Runner, hostname string, port int) (state.State, error) {
glog.Infof("Checking apiserver status ...")

pid, err := apiServerPID(cr)
if err != nil {
glog.Warningf("stopped: unable to get apiserver pid: %v", err)
return state.Stopped, nil
}

// Get the freezer cgroup entry for this pid
rr, err := cr.RunCmd(exec.Command("sudo", "egrep", "^[0-9]+:freezer:", fmt.Sprintf("/proc/%d/cgroup", pid)))
if err != nil {
glog.Warningf("unable to find freezer cgroup: %v", err)
return apiServerHealthz(hostname, port)

}
freezer := strings.TrimSpace(rr.Stdout.String())
glog.Infof("apiserver freezer: %q", freezer)
fparts := strings.Split(freezer, ":")
if len(fparts) != 3 {
glog.Warningf("unable to parse freezer - found %d parts: %s", len(fparts), freezer)
return apiServerHealthz(hostname, port)
}

rr, err = cr.RunCmd(exec.Command("sudo", "cat", path.Join("/sys/fs/cgroup/freezer", fparts[2], "freezer.state")))
if err != nil {
glog.Errorf("unable to get freezer state: %s", rr.Stderr.String())
return apiServerHealthz(hostname, port)
}

fs := strings.TrimSpace(rr.Stdout.String())
glog.Infof("freezer state: %q", fs)
if fs == "FREEZING" || fs == "FROZEN" {
return state.Paused, nil
}
return apiServerHealthz(hostname, port)
}

// apiServerHealthz hits the /healthz endpoint and returns libmachine style state.State
func apiServerHealthz(hostname string, port int) (state.State, error) {
url := fmt.Sprintf("https://%s/healthz", net.JoinHostPort(hostname, fmt.Sprint(port)))
glog.Infof("Checking apiserver healthz at %s ...", url)
// To avoid: x509: certificate signed by unknown authority
tr := &http.Transport{
Proxy: nil, // To avoid connectiv issue if http(s)_proxy is set.
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
client := &http.Client{Transport: tr}
resp, err := client.Get(url)
// Connection refused, usually.
if err != nil {
glog.Infof("stopped: %s: %v", url, err)
return state.Stopped, nil
}
if resp.StatusCode == http.StatusUnauthorized {
glog.Errorf("%s returned code %d (unauthorized). Please ensure that your apiserver authorization settings make sense!", url, resp.StatusCode)
return state.Error, nil
}
if resp.StatusCode != http.StatusOK {
glog.Warningf("%s response: %v %+v", url, err, resp)
return state.Error, nil
}
return state.Running, nil
}

||||||| merged common ancestors
// APIServerStatus returns apiserver status in libmachine style state.State
func APIServerStatus(cr command.Runner, ip net.IP, port int) (state.State, error) {
glog.Infof("Checking apiserver status ...")

pid, err := apiServerPID(cr)
if err != nil {
glog.Warningf("stopped: unable to get apiserver pid: %v", err)
return state.Stopped, nil
}

// Get the freezer cgroup entry for this pid
rr, err := cr.RunCmd(exec.Command("sudo", "egrep", "^[0-9]+:freezer:", fmt.Sprintf("/proc/%d/cgroup", pid)))
if err != nil {
glog.Warningf("unable to find freezer cgroup: %v", err)
return apiServerHealthz(ip, port)

}
freezer := strings.TrimSpace(rr.Stdout.String())
glog.Infof("apiserver freezer: %q", freezer)
fparts := strings.Split(freezer, ":")
if len(fparts) != 3 {
glog.Warningf("unable to parse freezer - found %d parts: %s", len(fparts), freezer)
return apiServerHealthz(ip, port)
}

rr, err = cr.RunCmd(exec.Command("sudo", "cat", path.Join("/sys/fs/cgroup/freezer", fparts[2], "freezer.state")))
if err != nil {
glog.Errorf("unable to get freezer state: %s", rr.Stderr.String())
return apiServerHealthz(ip, port)
}

fs := strings.TrimSpace(rr.Stdout.String())
glog.Infof("freezer state: %q", fs)
if fs == "FREEZING" || fs == "FROZEN" {
return state.Paused, nil
}
return apiServerHealthz(ip, port)
}

// apiServerHealthz hits the /healthz endpoint and returns libmachine style state.State
func apiServerHealthz(ip net.IP, port int) (state.State, error) {
url := fmt.Sprintf("https://%s/healthz", net.JoinHostPort(ip.String(), fmt.Sprint(port)))
glog.Infof("Checking apiserver healthz at %s ...", url)
// To avoid: x509: certificate signed by unknown authority
tr := &http.Transport{
Proxy: nil, // To avoid connectiv issue if http(s)_proxy is set.
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
client := &http.Client{Transport: tr}
resp, err := client.Get(url)
// Connection refused, usually.
if err != nil {
glog.Infof("stopped: %s: %v", url, err)
return state.Stopped, nil
}
if resp.StatusCode == http.StatusUnauthorized {
glog.Errorf("%s returned code %d (unauthorized). Please ensure that your apiserver authorization settings make sense!", url, resp.StatusCode)
return state.Error, nil
}
if resp.StatusCode != http.StatusOK {
glog.Warningf("%s response: %v %+v", url, err, resp)
return state.Error, nil
}
return state.Running, nil
}

=======
>>>>>>> break down kverify package to files
// KubeletStatus checks the kubelet status
func KubeletStatus(cr command.Runner) (state.State, error) {
glog.Infof("Checking kubelet status ...")
Expand Down
8 changes: 6 additions & 2 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
glog.Infof("%s is not a control plane, nothing to wait for", n.Name)
return nil
}
if cfg.WaitForCompos == kverify.NoWaitsCompos {
if kverify.DontWait(cfg.WaitForCompos) {
glog.Infof("skip waiting for components based on config.")
return nil
}
Expand All @@ -361,11 +361,15 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
}

if cfg.WaitForCompos[kverify.APIServerWait] {
client, err := k.client(ip, port)
if err != nil {
return errors.Wrap(err, "get k8s client")
}
if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, start, timeout); err != nil {
return errors.Wrap(err, "wait for apiserver proc")
}

if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, start, ip, port, timeout); err != nil {
if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, start, ip, port, timeout); err != nil {
return errors.Wrap(err, "wait for healthy API server")
}
}
Expand Down