Skip to content

Commit 9485934

Browse files
committed
feature(descheduler): add grace_period_seconds for DeschedulerPolicy
1 parent 29ff28c commit 9485934

File tree

9 files changed

+73
-6
lines changed

9 files changed

+73
-6
lines changed

pkg/api/types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ type DeschedulerPolicy struct {
4848

4949
// MetricsCollector configures collection of metrics about actual resource utilization
5050
MetricsCollector MetricsCollector
51+
52+
// GracePeriodSeconds The duration in seconds before the object should be deleted. Value must be non-negative integer.
53+
// The value zero indicates delete immediately. If this value is nil, the default grace period for the
54+
// specified type will be used.
55+
// Defaults to a per object value if not specified. zero means delete immediately.
56+
GracePeriodSeconds *int64
5157
}
5258

5359
// Namespaces carries a list of included/excluded namespaces

pkg/api/v1alpha2/types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ type DeschedulerPolicy struct {
4747

4848
// MetricsCollector configures collection of metrics for actual resource utilization
4949
MetricsCollector MetricsCollector `json:"metricsCollector,omitempty"`
50+
51+
// GracePeriodSeconds The duration in seconds before the object should be deleted. Value must be non-negative integer.
52+
// The value zero indicates delete immediately. If this value is nil, the default grace period for the
53+
// specified type will be used.
54+
// Defaults to a per object value if not specified. zero means delete immediately.
55+
GracePeriodSeconds *int64
5056
}
5157

5258
type DeschedulerProfile struct {

pkg/api/v1alpha2/zz_generated.conversion.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/api/v1alpha2/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/api/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/descheduler/descheduler.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu
157157
WithMaxPodsToEvictPerNamespace(deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace).
158158
WithMaxPodsToEvictTotal(deschedulerPolicy.MaxNoOfPodsToEvictTotal).
159159
WithEvictionFailureEventNotification(deschedulerPolicy.EvictionFailureEventNotification).
160+
WithGracePeriodSeconds(deschedulerPolicy.GracePeriodSeconds).
160161
WithDryRun(rs.DryRun).
161162
WithMetricsEnabled(!rs.DisableMetrics),
162163
)

pkg/descheduler/evictions/evictions.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ type PodEvictor struct {
214214
maxPodsToEvictPerNode *uint
215215
maxPodsToEvictPerNamespace *uint
216216
maxPodsToEvictTotal *uint
217+
gracePeriodSeconds *int64
217218
nodePodCount nodePodEvictedCount
218219
namespacePodCount namespacePodEvictCount
219220
totalPodCount uint
@@ -247,6 +248,7 @@ func NewPodEvictor(
247248
maxPodsToEvictPerNode: options.maxPodsToEvictPerNode,
248249
maxPodsToEvictPerNamespace: options.maxPodsToEvictPerNamespace,
249250
maxPodsToEvictTotal: options.maxPodsToEvictTotal,
251+
gracePeriodSeconds: options.gracePeriodSeconds,
250252
metricsEnabled: options.metricsEnabled,
251253
nodePodCount: make(nodePodEvictedCount),
252254
namespacePodCount: make(namespacePodEvictCount),
@@ -563,7 +565,9 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
563565

564566
// return (ignore, err)
565567
func (pe *PodEvictor) evictPod(ctx context.Context, pod *v1.Pod) (bool, error) {
566-
deleteOptions := &metav1.DeleteOptions{}
568+
deleteOptions := &metav1.DeleteOptions{
569+
GracePeriodSeconds: pe.gracePeriodSeconds,
570+
}
567571
// GracePeriodSeconds ?
568572
eviction := &policy.Eviction{
569573
TypeMeta: metav1.TypeMeta{

pkg/descheduler/evictions/options.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ type Options struct {
1212
maxPodsToEvictTotal *uint
1313
evictionFailureEventNotification bool
1414
metricsEnabled bool
15+
gracePeriodSeconds *int64
1516
}
1617

1718
// NewOptions returns an Options with default values.
@@ -46,6 +47,11 @@ func (o *Options) WithMaxPodsToEvictTotal(maxPodsToEvictTotal *uint) *Options {
4647
return o
4748
}
4849

50+
func (o *Options) WithGracePeriodSeconds(gracePeriodSeconds *int64) *Options {
51+
o.gracePeriodSeconds = gracePeriodSeconds
52+
return o
53+
}
54+
4955
func (o *Options) WithMetricsEnabled(metricsEnabled bool) *Options {
5056
o.metricsEnabled = metricsEnabled
5157
return o

test/e2e/e2e_toomanyrestarts_test.go

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ import (
4343

4444
const deploymentReplicas = 4
4545

46-
func tooManyRestartsPolicy(targetNamespace string, podRestartThresholds int32, includingInitContainers bool) *apiv1alpha2.DeschedulerPolicy {
46+
func tooManyRestartsPolicy(targetNamespace string, podRestartThresholds int32, includingInitContainers bool, gracePeriodSeconds int64) *apiv1alpha2.DeschedulerPolicy {
4747
return &apiv1alpha2.DeschedulerPolicy{
4848
Profiles: []apiv1alpha2.DeschedulerProfile{
4949
{
@@ -84,6 +84,7 @@ func tooManyRestartsPolicy(targetNamespace string, podRestartThresholds int32, i
8484
},
8585
},
8686
},
87+
GracePeriodSeconds: &gracePeriodSeconds,
8788
}
8889
}
8990

@@ -127,16 +128,23 @@ func TestTooManyRestarts(t *testing.T) {
127128
tests := []struct {
128129
name string
129130
policy *apiv1alpha2.DeschedulerPolicy
131+
enableGracePeriod bool
130132
expectedEvictedPodCount uint
131133
}{
132134
{
133135
name: "test-no-evictions",
134-
policy: tooManyRestartsPolicy(testNamespace.Name, 10000, true),
136+
policy: tooManyRestartsPolicy(testNamespace.Name, 10000, true, 0),
135137
expectedEvictedPodCount: 0,
136138
},
137139
{
138140
name: "test-one-evictions",
139-
policy: tooManyRestartsPolicy(testNamespace.Name, 3, true),
141+
policy: tooManyRestartsPolicy(testNamespace.Name, 3, true, 0),
142+
expectedEvictedPodCount: 4,
143+
},
144+
{
145+
name: "test-one-evictions-use-gracePeriodSeconds",
146+
policy: tooManyRestartsPolicy(testNamespace.Name, 3, true, 60),
147+
enableGracePeriod: true,
140148
expectedEvictedPodCount: 4,
141149
},
142150
}
@@ -197,8 +205,32 @@ func TestTooManyRestarts(t *testing.T) {
197205
deschedulerPodName = deschedulerPods[0].Name
198206
}
199207

208+
// Check if grace period is enabled and wait accordingly
209+
if tc.enableGracePeriod {
210+
// Ensure no pods are evicted during the grace period
211+
// Wait for 55 seconds to ensure that the pods are not evicted during the grace period
212+
// We do not want to use an extreme waiting time, such as 59 seconds,
213+
// because the grace period is set to 60 seconds.
214+
// In order to avoid unnecessary flake failures,
215+
// we only need to make sure that the pod is not evicted within a certain range.
216+
t.Logf("Waiting for grace period of %d seconds", 55)
217+
if err := wait.PollUntilContextTimeout(ctx, 1*time.Second, time.Duration(55)*time.Second, true, func(ctx context.Context) (bool, error) {
218+
currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
219+
actualEvictedPod := preRunNames.Difference(currentRunNames)
220+
actualEvictedPodCount := uint(actualEvictedPod.Len())
221+
t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount)
222+
if actualEvictedPodCount > 0 {
223+
t.Fatalf("Pods were evicted during grace period; expected 0, got %v", actualEvictedPodCount)
224+
return false, nil
225+
}
226+
return true, nil
227+
}); err != nil {
228+
t.Fatalf("Error waiting during grace period: %v", err)
229+
}
230+
}
231+
200232
// Run RemovePodsHavingTooManyRestarts strategy
201-
if err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 20*time.Second, true, func(ctx context.Context) (bool, error) {
233+
if err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 30*time.Second, true, func(ctx context.Context) (bool, error) {
202234
currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
203235
actualEvictedPod := preRunNames.Difference(currentRunNames)
204236
actualEvictedPodCount := uint(actualEvictedPod.Len())
@@ -210,7 +242,7 @@ func TestTooManyRestarts(t *testing.T) {
210242

211243
return true, nil
212244
}); err != nil {
213-
t.Errorf("Error waiting for descheduler running: %v", err)
245+
t.Fatalf("Error waiting for descheduler running: %v", err)
214246
}
215247
waitForTerminatingPodsToDisappear(ctx, t, clientSet, testNamespace.Name)
216248
})

0 commit comments

Comments
 (0)