Skip to content

Commit a5b9323

Browse files
committed
adding feature to uncordon node if full drain is unsucessful
1 parent db64c81 commit a5b9323

File tree

5 files changed

+34
-0
lines changed

5 files changed

+34
-0
lines changed

pkg/descheduler/node/node.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/apimachinery/pkg/api/resource"
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/labels"
29+
"k8s.io/apimachinery/pkg/types"
2930
clientset "k8s.io/client-go/kubernetes"
3031
listersv1 "k8s.io/client-go/listers/core/v1"
3132
"k8s.io/client-go/util/workqueue"
@@ -400,3 +401,26 @@ func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFun
400401

401402
return false, nil
402403
}
404+
405+
// UncordonNode removes the Unschedulable flag from a node to allow new pods to be scheduled on it.
406+
// This is useful when eviction has failed and we want to allow the node to receive new workloads again.
407+
func UncordonNode(ctx context.Context, client clientset.Interface, node *v1.Node) error {
408+
if !node.Spec.Unschedulable {
409+
// Node is already uncordoned, nothing to do
410+
return nil
411+
}
412+
413+
logger := klog.FromContext(ctx)
414+
logger.V(2).InfoS("Uncordoning node", "node", klog.KObj(node))
415+
416+
// Create a JSON patch to set Unschedulable to false
417+
patch := []byte(`[{"op": "replace", "path": "/spec/unschedulable", "value": false}]`)
418+
_, err := client.CoreV1().Nodes().Patch(ctx, node.Name, types.JSONPatchType, patch, metav1.PatchOptions{})
419+
if err != nil {
420+
logger.Error(err, "Failed to uncordon node", "node", klog.KObj(node))
421+
return fmt.Errorf("failed to uncordon node %s: %w", node.Name, err)
422+
}
423+
424+
logger.V(1).InfoS("Successfully uncordoned node", "node", klog.KObj(node))
425+
return nil
426+
}

pkg/framework/plugins/nodeutilization/highnodeutilization.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
275275
continueEvictionCond,
276276
h.usageClient,
277277
nil,
278+
h.handle,
278279
)
279280

280281
return nil

pkg/framework/plugins/nodeutilization/lownodeutilization.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
316316
continueEvictionCond,
317317
l.usageClient,
318318
nodeLimit,
319+
l.handle,
319320
)
320321

321322
return nil

pkg/framework/plugins/nodeutilization/nodeutilization.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ func evictPodsFromSourceNodes(
175175
continueEviction continueEvictionCond,
176176
usageClient usageClient,
177177
maxNoOfPodsToEvictPerNode *uint,
178+
handle frameworktypes.Handle,
178179
) {
179180
logger := klog.FromContext(ctx)
180181
available, err := assessAvailableResourceInNodes(destinationNodes, resourceNames)
@@ -240,6 +241,13 @@ func evictPodsFromSourceNodes(
240241
case *evictions.EvictionTotalLimitError:
241242
return
242243
default:
244+
// Eviction failed, uncordon the node to allow new pods to be scheduled
245+
if node.node.Spec.Unschedulable {
246+
logger.V(1).Info("Eviction failed, uncordoning node", "node", klog.KObj(node.node))
247+
if uncordonErr := nodeutil.UncordonNode(ctx, handle.ClientSet(), node.node); uncordonErr != nil {
248+
logger.Error(uncordonErr, "Failed to uncordon node after eviction failure", "node", klog.KObj(node.node))
249+
}
250+
}
243251
}
244252
}
245253
}

vendor/.DS_Store

6 KB
Binary file not shown.

0 commit comments

Comments
 (0)