Skip to content

Commit

Permalink
Handle cAdvisor partial failures
Browse files Browse the repository at this point in the history
  • Loading branch information
tallclair committed May 20, 2016
1 parent 1cce156 commit b05b419
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 19 deletions.
4 changes: 2 additions & 2 deletions pkg/kubelet/cadvisor/cadvisor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,15 +148,15 @@ func (cc *cadvisorClient) VersionInfo() (*cadvisorapi.VersionInfo, error) {

func (cc *cadvisorClient) SubcontainerInfo(name string, req *cadvisorapi.ContainerInfoRequest) (map[string]*cadvisorapi.ContainerInfo, error) {
infos, err := cc.SubcontainersInfo(name, req)
if err != nil {
if err != nil && len(infos) == 0 {
return nil, err
}

result := make(map[string]*cadvisorapi.ContainerInfo, len(infos))
for _, info := range infos {
result[info.Name] = info
}
return result, nil
return result, err
}

func (cc *cadvisorClient) MachineInfo() (*cadvisorapi.MachineInfo, error) {
Expand Down
30 changes: 18 additions & 12 deletions pkg/kubelet/server/stats/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ import (
"path"
"time"

"github.com/emicklei/go-restful"
"github.com/golang/glog"
cadvisorapi "github.com/google/cadvisor/info/v1"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"

"github.com/emicklei/go-restful"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
Expand Down Expand Up @@ -133,14 +133,14 @@ func parseStatsRequest(request *restful.Request) (StatsRequest, error) {
func (h *handler) handleStats(request *restful.Request, response *restful.Response) {
query, err := parseStatsRequest(request)
if err != nil {
handleError(response, err)
handleError(response, "/stats", err)
return
}

// Root container stats.
statsMap, err := h.provider.GetRawContainerInfo("/", query.cadvisorRequest(), false)
if err != nil {
handleError(response, err)
handleError(response, fmt.Sprintf("/stats %v", query), err)
return
}
writeResponse(response, statsMap["/"])
Expand All @@ -150,7 +150,7 @@ func (h *handler) handleStats(request *restful.Request, response *restful.Respon
func (h *handler) handleSummary(request *restful.Request, response *restful.Response) {
summary, err := h.summaryProvider.Get()
if err != nil {
handleError(response, err)
handleError(response, "/stats/summary", err)
} else {
writeResponse(response, summary)
}
Expand All @@ -160,7 +160,7 @@ func (h *handler) handleSummary(request *restful.Request, response *restful.Resp
func (h *handler) handleSystemContainer(request *restful.Request, response *restful.Response) {
query, err := parseStatsRequest(request)
if err != nil {
handleError(response, err)
handleError(response, "/stats/container", err)
return
}

Expand All @@ -169,8 +169,13 @@ func (h *handler) handleSystemContainer(request *restful.Request, response *rest
stats, err := h.provider.GetRawContainerInfo(
containerName, query.cadvisorRequest(), query.Subcontainers)
if err != nil {
handleError(response, err)
return
if _, ok := stats[containerName]; ok {
// If the failure is partial, log it and return a best-effort response.
glog.Errorf("Partial failure issuing GetRawContainerInfo(%v): %v", query, err)
} else {
handleError(response, fmt.Sprintf("/stats/container %v", query), err)
return
}
}
writeResponse(response, stats)
}
Expand All @@ -181,7 +186,7 @@ func (h *handler) handleSystemContainer(request *restful.Request, response *rest
func (h *handler) handlePodContainer(request *restful.Request, response *restful.Response) {
query, err := parseStatsRequest(request)
if err != nil {
handleError(response, err)
handleError(response, request.Request.URL.String(), err)
return
}

Expand All @@ -203,7 +208,7 @@ func (h *handler) handlePodContainer(request *restful.Request, response *restful
pod, ok := h.provider.GetPodByName(params["namespace"], params["podName"])
if !ok {
glog.V(4).Infof("Container not found: %v", params)
handleError(response, kubecontainer.ErrContainerNotFound)
response.WriteError(http.StatusNotFound, kubecontainer.ErrContainerNotFound)
return
}
stats, err := h.provider.GetContainerInfo(
Expand All @@ -213,7 +218,7 @@ func (h *handler) handlePodContainer(request *restful.Request, response *restful
query.cadvisorRequest())

if err != nil {
handleError(response, err)
handleError(response, fmt.Sprintf("%s %v", request.Request.URL.String(), query), err)
return
}
writeResponse(response, stats)
Expand All @@ -226,13 +231,14 @@ func writeResponse(response *restful.Response, stats interface{}) {
}

// handleError serializes an error object into an HTTP response.
func handleError(response *restful.Response, err error) {
// request is provided for logging.
func handleError(response *restful.Response, request string, err error) {
switch err {
case kubecontainer.ErrContainerNotFound:
response.WriteError(http.StatusNotFound, err)
default:
msg := fmt.Sprintf("Internal Error: %v", err)
glog.Errorf("HTTP InternalServerError: %s", msg)
glog.Errorf("HTTP InternalServerError serving %s: %s", request, msg)
response.WriteErrorString(http.StatusInternalServerError, msg)
}
}
17 changes: 12 additions & 5 deletions pkg/kubelet/server/stats/summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,26 +68,33 @@ func (sp *summaryProviderImpl) Get() (*stats.Summary, error) {
}
infos, err := sp.provider.GetContainerInfoV2("/", options)
if err != nil {
return nil, err
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort response.
glog.Errorf("Partial failure issuing GetContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed GetContainerInfoV2: %v", err)
}
}

// TODO(timstclair): Consider returning a best-effort response if any of the following errors
// occur.
node, err := sp.provider.GetNode()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed GetNode: %v", err)
}

nodeConfig := sp.provider.GetNodeConfig()
rootFsInfo, err := sp.provider.RootFsInfo()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed RootFsInfo: %v", err)
}
imageFsInfo, err := sp.provider.DockerImagesFsInfo()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed DockerImagesFsInfo: %v", err)
}
imageStats, err := sp.runtime.ImageStats()
if err != nil || imageStats == nil {
return nil, err
return nil, fmt.Errorf("failed ImageStats: %v", err)
}
sb := &summaryBuilder{sp.fsResourceAnalyzer, node, nodeConfig, rootFsInfo, imageFsInfo, *imageStats, infos}
return sb.build()
Expand Down

0 comments on commit b05b419

Please sign in to comment.