Skip to content

Commit 86fe687

Browse files
committed
Skip draining failed DaemonSet pods to prevent recreation loops
In scenarios where nodes experience DiskPressure, DaemonSet pods may be evicted and enter Failed state. When deleting Machines, cluster-api attempts to delete these failed pods, but this triggers DaemonSet to create new pods which can then be evicted again due to persistent DiskPressure, creating an infinite loop. Signed-off-by: liuxu <[email protected]>
1 parent 31ea0af commit 86fe687

File tree

2 files changed

+20
-23
lines changed

2 files changed

+20
-23
lines changed

internal/controllers/machine/drain/drain_test.go

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ func TestGetPodsForEviction(t *testing.T) {
268268
},
269269
{
270270
ObjectMeta: metav1.ObjectMeta{
271-
Name: "pod-2-delete-succeeded-daemonset-pod",
271+
Name: "pod-2-skip-succeeded-daemonset-pod",
272272
Namespace: metav1.NamespaceDefault,
273273
OwnerReferences: []metav1.OwnerReference{
274274
{
@@ -283,7 +283,22 @@ func TestGetPodsForEviction(t *testing.T) {
283283
},
284284
{
285285
ObjectMeta: metav1.ObjectMeta{
286-
Name: "pod-3-delete-orphaned-daemonset-pod",
286+
Name: "pod-3-skip-failed-daemonset-pod",
287+
Namespace: metav1.NamespaceDefault,
288+
OwnerReferences: []metav1.OwnerReference{
289+
{
290+
Kind: "DaemonSet",
291+
Controller: ptr.To(true),
292+
},
293+
},
294+
},
295+
Status: corev1.PodStatus{
296+
Phase: corev1.PodFailed,
297+
},
298+
},
299+
{
300+
ObjectMeta: metav1.ObjectMeta{
301+
Name: "pod-4-delete-orphaned-daemonset-pod",
287302
Namespace: metav1.NamespaceDefault,
288303
OwnerReferences: []metav1.OwnerReference{
289304
{
@@ -299,7 +314,7 @@ func TestGetPodsForEviction(t *testing.T) {
299314
},
300315
{
301316
ObjectMeta: metav1.ObjectMeta{
302-
Name: "pod-4-skip-daemonset-pod",
317+
Name: "pod-5-skip-daemonset-pod",
303318
Namespace: metav1.NamespaceDefault,
304319
OwnerReferences: []metav1.OwnerReference{
305320
{
@@ -332,21 +347,7 @@ func TestGetPodsForEviction(t *testing.T) {
332347
{
333348
Pod: &corev1.Pod{
334349
ObjectMeta: metav1.ObjectMeta{
335-
Name: "pod-2-delete-succeeded-daemonset-pod",
336-
Namespace: metav1.NamespaceDefault,
337-
},
338-
},
339-
// Delete this DaemonSet Pod because it is succeeded.
340-
Status: PodDeleteStatus{
341-
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorDrain,
342-
DrainOrder: ptr.To[int32](0),
343-
Reason: PodDeleteStatusTypeOkay,
344-
},
345-
},
346-
{
347-
Pod: &corev1.Pod{
348-
ObjectMeta: metav1.ObjectMeta{
349-
Name: "pod-3-delete-orphaned-daemonset-pod",
350+
Name: "pod-4-delete-orphaned-daemonset-pod",
350351
Namespace: metav1.NamespaceDefault,
351352
},
352353
},
@@ -361,7 +362,7 @@ func TestGetPodsForEviction(t *testing.T) {
361362
{
362363
Pod: &corev1.Pod{
363364
ObjectMeta: metav1.ObjectMeta{
364-
Name: "pod-4-skip-daemonset-pod",
365+
Name: "pod-5-skip-daemonset-pod",
365366
Namespace: metav1.NamespaceDefault,
366367
},
367368
},

internal/controllers/machine/drain/filters.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,6 @@ func (d *Helper) daemonSetFilter(ctx context.Context, pod *corev1.Pod) PodDelete
212212
if controllerRef == nil || controllerRef.Kind != appsv1.SchemeGroupVersion.WithKind("DaemonSet").Kind {
213213
return MakePodDeleteStatusOkay()
214214
}
215-
// Any finished pod can be removed.
216-
if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
217-
return MakePodDeleteStatusOkay()
218-
}
219215

220216
if err := d.RemoteClient.Get(ctx, client.ObjectKey{Namespace: pod.Namespace, Name: controllerRef.Name}, &appsv1.DaemonSet{}); err != nil {
221217
// remove orphaned pods with a warning

0 commit comments

Comments
 (0)