Skip to content

Commit

Permalink
Gracefully delete pods from the Kubelet
Browse files Browse the repository at this point in the history
This commit wires together the graceful delete option for pods
on the Kubelet.  When a pod is deleted on the API server, a
grace period is calculated that is based on the
Pod.Spec.TerminationGracePeriodInSeconds, the user's provided grace
period, or a default.  The grace period can only shrink once set.
The value provided by the user (or the default) is set onto metadata
as DeletionGracePeriod.

When the Kubelet sees a pod with DeletionTimestamp set, it uses the
value of ObjectMeta.GracePeriodSeconds as the grace period
sent to Docker.  When updating status, if the pod has DeletionTimestamp
set and all containers are terminated, the Kubelet will update the
status one last time and then invoke Delete(pod, grace: 0) to
clean up the pod immediately.
  • Loading branch information
smarterclayton committed Jun 1, 2015
1 parent da7c612 commit 72ee028
Show file tree
Hide file tree
Showing 43 changed files with 592 additions and 174 deletions.
6 changes: 6 additions & 0 deletions pkg/api/deep_copy_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/rest/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje
} else {
objectMeta.Namespace = api.NamespaceNone
}
objectMeta.DeletionTimestamp = nil
objectMeta.DeletionGracePeriodSeconds = nil
strategy.PrepareForCreate(obj)
api.FillObjectMetaSystemFields(ctx, objectMeta)
api.GenerateName(strategy, objectMeta)
Expand Down
27 changes: 26 additions & 1 deletion pkg/api/rest/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,37 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje
if strategy == nil {
return false, false, nil
}
_, _, kerr := objectMetaAndKind(strategy, obj)
objectMeta, _, kerr := objectMetaAndKind(strategy, obj)
if kerr != nil {
return false, false, kerr
}

// if the object is already being deleted
if objectMeta.DeletionTimestamp != nil {
// if we are already being deleted, we may only shorten the deletion grace period
// this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set,
// so we force deletion immediately
if objectMeta.DeletionGracePeriodSeconds == nil {
return false, false, nil
}
// only a shorter grace period may be provided by a user
if options.GracePeriodSeconds != nil {
period := int64(*options.GracePeriodSeconds)
if period > *objectMeta.DeletionGracePeriodSeconds {
return false, true, nil
}
objectMeta.DeletionGracePeriodSeconds = &period
options.GracePeriodSeconds = &period
return true, false, nil
}
// graceful deletion is pending, do nothing
options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds
return false, true, nil
}

if !strategy.CheckGracefulDelete(obj, options) {
return false, false, nil
}
objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds
return true, false, nil
}
93 changes: 91 additions & 2 deletions pkg/api/rest/resttest/resttest.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) {

func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0)
t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn)
}

func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) {
Expand Down Expand Up @@ -362,12 +364,99 @@ func (t *Tester) TestDeleteGracefulHasDefault(existing runtime.Object, expectedG
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil {
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
return
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
return
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}

func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}

ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}

func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}

ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
// second delete duration is ignored
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}

func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) {
Expand All @@ -382,6 +471,6 @@ func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should exist: %v", err)
t.Errorf("unexpected error, object should not exist: %v", err)
}
}
3 changes: 3 additions & 0 deletions pkg/api/serialization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,16 @@ func TestRoundTripTypes(t *testing.T) {
}

func TestEncode_Ptr(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
Labels: map[string]string{"name": "foo"},
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,

TerminationGracePeriodSeconds: &grace,
},
}
obj := runtime.Object(pod)
Expand Down
9 changes: 9 additions & 0 deletions pkg/api/testing/fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer {
j.LabelSelector, _ = labels.Parse("a=b")
j.FieldSelector, _ = fields.ParseSelector("a=b")
},
func(j *api.PodSpec, c fuzz.Continue) {
c.FuzzNoCustom(j)
// has a default value
ttl := int64(30)
if c.RandBool() {
ttl = int64(c.Uint32())
}
j.TerminationGracePeriodSeconds = &ttl
},
func(j *api.PodPhase, c fuzz.Continue) {
statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown}
*j = statuses[c.Rand.Intn(len(statuses))]
Expand Down
4 changes: 4 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"`

// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"`

// Labels are key value pairs that may be used to scope and select individual resources.
// Label keys are of the form:
// label-key ::= prefixed-name | name
Expand Down
12 changes: 12 additions & 0 deletions pkg/api/v1/conversion_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down Expand Up @@ -3362,6 +3368,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down
6 changes: 6 additions & 0 deletions pkg/api/v1/deep_copy_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -933,6 +933,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down
4 changes: 4 additions & 0 deletions pkg/api/v1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ func addDefaultingFuncs() {
if obj.HostNetwork {
defaultHostNetworkPorts(&obj.Containers)
}
if obj.TerminationGracePeriodSeconds == nil {
period := int64(DefaultTerminationGracePeriodSeconds)
obj.TerminationGracePeriodSeconds = &period
}
},
func(obj *Probe) {
if obj.TimeoutSeconds == 0 {
Expand Down
8 changes: 7 additions & 1 deletion pkg/api/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"`

// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"`

// Labels are key value pairs that may be used to scope and select individual resources.
// TODO: replace map[string]string with labels.LabelSet type
Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"`
Expand Down Expand Up @@ -838,6 +842,8 @@ const (
// DNSDefault indicates that the pod should use the default (as
// determined by kubelet) DNS settings.
DNSDefault DNSPolicy = "Default"

DefaultTerminationGracePeriodSeconds = 30
)

// PodSpec is a description of a pod
Expand All @@ -852,7 +858,7 @@ type PodSpec struct {
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"`
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer`
// Optional: Set DNS policy. Defaults to "ClusterFirst"
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"`
Expand Down
12 changes: 12 additions & 0 deletions pkg/api/v1beta3/conversion_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,12 @@ func convert_api_ObjectMeta_To_v1beta3_ObjectMeta(in *api.ObjectMeta, out *Objec
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down Expand Up @@ -3034,6 +3040,12 @@ func convert_v1beta3_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.Objec
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down
6 changes: 6 additions & 0 deletions pkg/api/v1beta3/deep_copy_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,12 @@ func deepCopy_v1beta3_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.C
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
Expand Down
4 changes: 4 additions & 0 deletions pkg/api/v1beta3/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ func addDefaultingFuncs() {
if obj.HostNetwork {
defaultHostNetworkPorts(&obj.Containers)
}
if obj.TerminationGracePeriodSeconds == nil {
period := int64(DefaultTerminationGracePeriodSeconds)
obj.TerminationGracePeriodSeconds = &period
}
},
func(obj *Probe) {
if obj.TimeoutSeconds == 0 {
Expand Down
8 changes: 7 additions & 1 deletion pkg/api/v1beta3/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested"`

// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"`

// Labels are key value pairs that may be used to scope and select individual resources.
// TODO: replace map[string]string with labels.LabelSet type
Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services"`
Expand Down Expand Up @@ -842,6 +846,8 @@ const (
// DNSDefault indicates that the pod should use the default (as
// determined by kubelet) DNS settings.
DNSDefault DNSPolicy = "Default"

DefaultTerminationGracePeriodSeconds = 30
)

// PodSpec is a description of a pod
Expand All @@ -856,7 +862,7 @@ type PodSpec struct {
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"`
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer`
// Optional: Set DNS policy. Defaults to "ClusterFirst"
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"`
Expand Down
10 changes: 10 additions & 0 deletions pkg/api/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,16 @@ func ValidateObjectMetaUpdate(old, meta *api.ObjectMeta) errs.ValidationErrorLis
} else {
meta.CreationTimestamp = old.CreationTimestamp
}
// an object can never remove a deletion timestamp or clear/change grace period seconds
if !old.DeletionTimestamp.IsZero() {
meta.DeletionTimestamp = old.DeletionTimestamp
}
if old.DeletionGracePeriodSeconds != nil && meta.DeletionGracePeriodSeconds == nil {
meta.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds
}
if meta.DeletionGracePeriodSeconds != nil && *meta.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds {
allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", meta.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion"))
}

// Reject updates that don't specify a resource version
if meta.ResourceVersion == "" {
Expand Down
Loading

0 comments on commit 72ee028

Please sign in to comment.