mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-28 22:57:35 +01:00
PodEvictor: turn an exceeded limit into an error
When checking for node limit getting exceeded the pod eviction never fails. Thus, ignoring the metric reporting when a pod fails to be evicted due to node limit constrains. The error also allows plugin to react on other limits getting exceeded. E.g. the limit on the number of pods evicted per namespace.
This commit is contained in:
@@ -311,42 +311,48 @@ func evictPods(
|
||||
continue
|
||||
}
|
||||
|
||||
if preEvictionFilterWithOptions(pod) {
|
||||
if podEvictor.Evict(ctx, pod, evictOptions) {
|
||||
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
|
||||
if !preEvictionFilterWithOptions(pod) {
|
||||
continue
|
||||
}
|
||||
err = podEvictor.Evict(ctx, pod, evictOptions)
|
||||
if err == nil {
|
||||
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
|
||||
|
||||
for name := range totalAvailableUsage {
|
||||
if name == v1.ResourcePods {
|
||||
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
} else {
|
||||
quantity := utils.GetResourceRequestQuantity(pod, name)
|
||||
nodeInfo.usage[name].Sub(quantity)
|
||||
totalAvailableUsage[name].Sub(quantity)
|
||||
}
|
||||
}
|
||||
|
||||
keysAndValues := []interface{}{
|
||||
"node", nodeInfo.node.Name,
|
||||
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
|
||||
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
|
||||
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
// check if pods can be still evicted
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
break
|
||||
for name := range totalAvailableUsage {
|
||||
if name == v1.ResourcePods {
|
||||
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
} else {
|
||||
quantity := utils.GetResourceRequestQuantity(pod, name)
|
||||
nodeInfo.usage[name].Sub(quantity)
|
||||
totalAvailableUsage[name].Sub(quantity)
|
||||
}
|
||||
}
|
||||
|
||||
keysAndValues := []interface{}{
|
||||
"node", nodeInfo.node.Name,
|
||||
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
|
||||
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
|
||||
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
// check if pods can be still evicted
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if podEvictor.NodeLimitExceeded(nodeInfo.node) {
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
return
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,9 +131,17 @@ func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *framewo
|
||||
// in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction
|
||||
podutil.SortPodsBasedOnAge(podsToEvict)
|
||||
|
||||
loop:
|
||||
for _, pod := range podsToEvict {
|
||||
if !d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
|
||||
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
continue loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -210,9 +210,15 @@ func (r *RemoveDuplicates) Balance(ctx context.Context, nodes []*v1.Node) *frame
|
||||
// It's assumed all duplicated pods are in the same priority class
|
||||
// TODO(jchaloup): check if the pod has a different node to lend to
|
||||
for _, pod := range pods[upperAvg-1:] {
|
||||
r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if r.handle.Evictor().NodeLimitExceeded(nodeMap[nodeName]) {
|
||||
err := r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
continue loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,10 +102,17 @@ func (d *RemoveFailedPods) Deschedule(ctx context.Context, nodes []*v1.Node) *fr
|
||||
}
|
||||
}
|
||||
totalPods := len(pods)
|
||||
loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,10 +122,17 @@ func (d *RemovePodsHavingTooManyRestarts) Deschedule(ctx context.Context, nodes
|
||||
}
|
||||
}
|
||||
totalPods := len(pods)
|
||||
loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,7 +98,8 @@ loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
if utils.CheckPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) {
|
||||
if d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
|
||||
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
// Since the current pod is evicted all other pods which have anti-affinity with this
|
||||
// pod need not be evicted.
|
||||
// Update allPods.
|
||||
@@ -106,12 +107,16 @@ loop:
|
||||
pods = append(pods[:i], pods[i+1:]...)
|
||||
i--
|
||||
totalPods--
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
continue loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -134,11 +134,18 @@ func (d *RemovePodsViolatingNodeAffinity) processNodes(ctx context.Context, node
|
||||
}
|
||||
}
|
||||
|
||||
loop:
|
||||
for _, pod := range pods {
|
||||
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
|
||||
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,6 +114,7 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
|
||||
}
|
||||
}
|
||||
totalPods := len(pods)
|
||||
loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(
|
||||
pods[i].Spec.Tolerations,
|
||||
@@ -121,9 +122,15 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
|
||||
d.taintFilterFnc,
|
||||
) {
|
||||
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
|
||||
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,10 +235,16 @@ func (d *RemovePodsViolatingTopologySpreadConstraint) Balance(ctx context.Contex
|
||||
}
|
||||
|
||||
if d.handle.Evictor().PreEvictionFilter(pod) {
|
||||
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
}
|
||||
if d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
|
||||
nodeLimitExceeded[pod.Spec.NodeName] = true
|
||||
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
nodeLimitExceeded[pod.Spec.NodeName] = true
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user