Skip to content

Commit

Permalink
Record event when image GC fails.
Browse files Browse the repository at this point in the history
  • Loading branch information
vmarmol committed Mar 30, 2015
1 parent 31324a0 commit de35c8f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 21 deletions.
22 changes: 17 additions & 5 deletions pkg/kubelet/image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"sync"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
Expand Down Expand Up @@ -65,6 +67,12 @@ type realImageManager struct {

// cAdvisor instance.
cadvisor cadvisor.Interface

// Recorder for Kubernetes events.
recorder record.EventRecorder

// Reference to this node.
nodeRef *api.ObjectReference
}

// Information about the images we track.
Expand All @@ -79,7 +87,7 @@ type imageRecord struct {
size int64
}

func newImageManager(dockerClient dockertools.DockerInterface, cadvisorInterface cadvisor.Interface, policy ImageGCPolicy) (imageManager, error) {
func newImageManager(dockerClient dockertools.DockerInterface, cadvisorInterface cadvisor.Interface, recorder record.EventRecorder, nodeRef *api.ObjectReference, policy ImageGCPolicy) (imageManager, error) {
// Validate policy.
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
Expand All @@ -92,6 +100,8 @@ func newImageManager(dockerClient dockertools.DockerInterface, cadvisorInterface
policy: policy,
imageRecords: make(map[string]*imageRecord),
cadvisor: cadvisorInterface,
recorder: recorder,
nodeRef: nodeRef,
}

err := im.start()
Expand Down Expand Up @@ -182,8 +192,9 @@ func (self *realImageManager) GarbageCollect() error {

// Check valid capacity.
if capacity == 0 {
// TODO(vmarmol): Surface event.
return fmt.Errorf("invalid capacity %d on device %q at mount point %q", capacity, fsInfo.Device, fsInfo.Mountpoint)
err := fmt.Errorf("invalid capacity %d on device %q at mount point %q", capacity, fsInfo.Device, fsInfo.Mountpoint)
self.recorder.Eventf(self.nodeRef, "invalidDiskCapacity", err.Error())
return err
}

// If over the max threshold, free enough to place us at the lower threshold.
Expand All @@ -197,8 +208,9 @@ func (self *realImageManager) GarbageCollect() error {
}

if freed < amountToFree {
// TODO(vmarmol): Surface event.
return fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d, but freed %d", amountToFree, freed)
err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d, but freed %d", amountToFree, freed)
self.recorder.Eventf(self.nodeRef, "freeDiskSpaceFailed", err.Error())
return err
}
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"testing"
"time"

"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
Expand All @@ -42,6 +43,7 @@ func newRealImageManager(policy ImageGCPolicy) (*realImageManager, *dockertools.
policy: policy,
imageRecords: make(map[string]*imageRecord),
cadvisor: mockCadvisor,
recorder: &record.FakeRecorder{},
}, fakeDocker, mockCadvisor
}

Expand Down
34 changes: 18 additions & 16 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,21 @@ func NewMainKubelet(
}
nodeLister := &cache.StoreToNodeLister{nodeStore}

// TODO: get the real minion object of ourself,
// and use the real minion name and UID.
// TODO: what is namespace for node?
nodeRef := &api.ObjectReference{
Kind: "Node",
Name: hostname,
UID: types.UID(hostname),
Namespace: "",
}

containerGC, err := newContainerGC(dockerClient, containerGCPolicy)
if err != nil {
return nil, err
}
imageManager, err := newImageManager(dockerClient, cadvisorInterface, imageGCPolicy)
imageManager, err := newImageManager(dockerClient, cadvisorInterface, recorder, nodeRef, imageGCPolicy)
if err != nil {
return nil, fmt.Errorf("failed to initialize image manager: %v", err)
}
Expand Down Expand Up @@ -232,6 +242,7 @@ func NewMainKubelet(
imageManager: imageManager,
statusManager: statusManager,
cloud: cloud,
nodeRef: nodeRef,
}

klet.podManager = newBasicPodManager(klet.kubeClient)
Expand Down Expand Up @@ -350,6 +361,9 @@ type Kubelet struct {

//Cloud provider interface
cloud cloudprovider.Interface

// Reference to this node.
nodeRef *api.ObjectReference
}

// getRootDir returns the full path to the directory under which kubelet can
Expand Down Expand Up @@ -1737,7 +1751,7 @@ func (kl *Kubelet) updateNodeStatus() error {
func (kl *Kubelet) recordNodeOnlineEvent() {
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
kl.recorder.Eventf(kl.getNodeReference(), "online", "Node %s is now online", kl.hostname)
kl.recorder.Eventf(kl.nodeRef, "online", "Node %s is now online", kl.hostname)
}

// tryUpdateNodeStatus tries to update node status to master.
Expand All @@ -1763,7 +1777,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
node.Status.NodeInfo.BootID != info.BootID {
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
kl.recorder.Eventf(kl.getNodeReference(), "rebooted",
kl.recorder.Eventf(kl.nodeRef, "rebooted",
"Node %s has been rebooted, boot id: %s", kl.hostname, info.BootID)
}
node.Status.NodeInfo.BootID = info.BootID
Expand Down Expand Up @@ -2013,22 +2027,10 @@ func (kl *Kubelet) PortForward(podFullName string, uid types.UID, port uint16, s
return kl.runner.PortForward(podInfraContainer.ID, port, stream)
}

func (kl *Kubelet) getNodeReference() *api.ObjectReference {
// and use the real minion name and UID.
// TODO: what is namespace for node?
return &api.ObjectReference{
Kind: "Node",
Name: kl.hostname,
UID: types.UID(kl.hostname),
Namespace: "",
}
}

// BirthCry sends an event that the kubelet has started up.
func (kl *Kubelet) BirthCry() {
// Make an event that kubelet restarted.
// TODO: get the real minion object of ourself,
kl.recorder.Eventf(kl.getNodeReference(), "starting", "Starting kubelet.")
kl.recorder.Eventf(kl.nodeRef, "starting", "Starting kubelet.")
}

func (kl *Kubelet) StreamingConnectionIdleTimeout() time.Duration {
Expand Down

0 comments on commit de35c8f

Please sign in to comment.