From de35c8f2af6e1d1a3872d8577522853a6b2c1667 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Fri, 27 Mar 2015 13:12:48 -0700 Subject: [PATCH] Record event when image GC fails. --- pkg/kubelet/image_manager.go | 22 +++++++++++++++----- pkg/kubelet/image_manager_test.go | 2 ++ pkg/kubelet/kubelet.go | 34 ++++++++++++++++--------------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/pkg/kubelet/image_manager.go b/pkg/kubelet/image_manager.go index 6b46647765eed..a62cc0a552969 100644 --- a/pkg/kubelet/image_manager.go +++ b/pkg/kubelet/image_manager.go @@ -22,6 +22,8 @@ import ( "sync" "time" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor" "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" @@ -65,6 +67,12 @@ type realImageManager struct { // cAdvisor instance. cadvisor cadvisor.Interface + + // Recorder for Kubernetes events. + recorder record.EventRecorder + + // Reference to this node. + nodeRef *api.ObjectReference } // Information about the images we track. @@ -79,7 +87,7 @@ type imageRecord struct { size int64 } -func newImageManager(dockerClient dockertools.DockerInterface, cadvisorInterface cadvisor.Interface, policy ImageGCPolicy) (imageManager, error) { +func newImageManager(dockerClient dockertools.DockerInterface, cadvisorInterface cadvisor.Interface, recorder record.EventRecorder, nodeRef *api.ObjectReference, policy ImageGCPolicy) (imageManager, error) { // Validate policy. if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 { return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent) @@ -92,6 +100,8 @@ func newImageManager(dockerClient dockertools.DockerInterface, cadvisorInterface policy: policy, imageRecords: make(map[string]*imageRecord), cadvisor: cadvisorInterface, + recorder: recorder, + nodeRef: nodeRef, } err := im.start() @@ -182,8 +192,9 @@ func (self *realImageManager) GarbageCollect() error { // Check valid capacity. if capacity == 0 { - // TODO(vmarmol): Surface event. - return fmt.Errorf("invalid capacity %d on device %q at mount point %q", capacity, fsInfo.Device, fsInfo.Mountpoint) + err := fmt.Errorf("invalid capacity %d on device %q at mount point %q", capacity, fsInfo.Device, fsInfo.Mountpoint) + self.recorder.Eventf(self.nodeRef, "invalidDiskCapacity", err.Error()) + return err } // If over the max threshold, free enough to place us at the lower threshold. @@ -197,8 +208,9 @@ func (self *realImageManager) GarbageCollect() error { } if freed < amountToFree { - // TODO(vmarmol): Surface event. - return fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d, but freed %d", amountToFree, freed) + err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d, but freed %d", amountToFree, freed) + self.recorder.Eventf(self.nodeRef, "freeDiskSpaceFailed", err.Error()) + return err } } diff --git a/pkg/kubelet/image_manager_test.go b/pkg/kubelet/image_manager_test.go index 48fa04bae93f3..26e7d42441881 100644 --- a/pkg/kubelet/image_manager_test.go +++ b/pkg/kubelet/image_manager_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor" "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" @@ -42,6 +43,7 @@ func newRealImageManager(policy ImageGCPolicy) (*realImageManager, *dockertools. policy: policy, imageRecords: make(map[string]*imageRecord), cadvisor: mockCadvisor, + recorder: &record.FakeRecorder{}, }, fakeDocker, mockCadvisor } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index fe8feafa64b0f..74fd9da89f346 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -195,11 +195,21 @@ func NewMainKubelet( } nodeLister := &cache.StoreToNodeLister{nodeStore} + // TODO: get the real minion object of ourself, + // and use the real minion name and UID. + // TODO: what is namespace for node? + nodeRef := &api.ObjectReference{ + Kind: "Node", + Name: hostname, + UID: types.UID(hostname), + Namespace: "", + } + containerGC, err := newContainerGC(dockerClient, containerGCPolicy) if err != nil { return nil, err } - imageManager, err := newImageManager(dockerClient, cadvisorInterface, imageGCPolicy) + imageManager, err := newImageManager(dockerClient, cadvisorInterface, recorder, nodeRef, imageGCPolicy) if err != nil { return nil, fmt.Errorf("failed to initialize image manager: %v", err) } @@ -232,6 +242,7 @@ func NewMainKubelet( imageManager: imageManager, statusManager: statusManager, cloud: cloud, + nodeRef: nodeRef, } klet.podManager = newBasicPodManager(klet.kubeClient) @@ -350,6 +361,9 @@ type Kubelet struct { //Cloud provider interface cloud cloudprovider.Interface + + // Reference to this node. + nodeRef *api.ObjectReference } // getRootDir returns the full path to the directory under which kubelet can @@ -1737,7 +1751,7 @@ func (kl *Kubelet) updateNodeStatus() error { func (kl *Kubelet) recordNodeOnlineEvent() { // TODO: This requires a transaction, either both node status is updated // and event is recorded or neither should happen, see issue #6055. - kl.recorder.Eventf(kl.getNodeReference(), "online", "Node %s is now online", kl.hostname) + kl.recorder.Eventf(kl.nodeRef, "online", "Node %s is now online", kl.hostname) } // tryUpdateNodeStatus tries to update node status to master. @@ -1763,7 +1777,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { node.Status.NodeInfo.BootID != info.BootID { // TODO: This requires a transaction, either both node status is updated // and event is recorded or neither should happen, see issue #6055. - kl.recorder.Eventf(kl.getNodeReference(), "rebooted", + kl.recorder.Eventf(kl.nodeRef, "rebooted", "Node %s has been rebooted, boot id: %s", kl.hostname, info.BootID) } node.Status.NodeInfo.BootID = info.BootID @@ -2013,22 +2027,10 @@ func (kl *Kubelet) PortForward(podFullName string, uid types.UID, port uint16, s return kl.runner.PortForward(podInfraContainer.ID, port, stream) } -func (kl *Kubelet) getNodeReference() *api.ObjectReference { - // and use the real minion name and UID. - // TODO: what is namespace for node? - return &api.ObjectReference{ - Kind: "Node", - Name: kl.hostname, - UID: types.UID(kl.hostname), - Namespace: "", - } -} - // BirthCry sends an event that the kubelet has started up. func (kl *Kubelet) BirthCry() { // Make an event that kubelet restarted. - // TODO: get the real minion object of ourself, - kl.recorder.Eventf(kl.getNodeReference(), "starting", "Starting kubelet.") + kl.recorder.Eventf(kl.nodeRef, "starting", "Starting kubelet.") } func (kl *Kubelet) StreamingConnectionIdleTimeout() time.Duration {