1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-28 06:29:29 +01:00

Refractor - Modify the common functions to be used by high utilisation

This commit is contained in:
Hanu
2021-06-01 21:31:11 +08:00
parent 6e71068f4f
commit 2f18864fa5
3 changed files with 50 additions and 42 deletions

View File

@@ -20,6 +20,7 @@ import (
"context" "context"
"fmt" "fmt"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2" "k8s.io/klog/v2"
@@ -33,7 +34,7 @@ import (
// to calculate nodes' utilization and not the actual resource usage. // to calculate nodes' utilization and not the actual resource usage.
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params? // TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if err := validateLowNodeUtilizationParams(strategy.Params); err != nil { if err := validateNodeUtilizationParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid LowNodeUtilization parameters") klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
return return
} }
@@ -50,7 +51,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil { if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
klog.ErrorS(err, "LowNodeUtilization config is not valid") klog.ErrorS(err, "LowNodeUtilization config is not valid")
return return
} }
@@ -69,7 +70,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
} }
resourceNames := getResourceNames(thresholds) resourceNames := getResourceNames(thresholds)
lowNodes, targetNodes := classifyNodes( lowNodes, sourceNodes := classifyNodes(
getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames), getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames),
// The node has to be schedulable (to be able to move workload there) // The node has to be schedulable (to be able to move workload there)
func(node *v1.Node, usage NodeUsage) bool { func(node *v1.Node, usage NodeUsage) bool {
@@ -110,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
} }
} }
klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...) klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...)
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(targetNodes)) klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
if len(lowNodes) == 0 { if len(lowNodes) == 0 {
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further") klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
@@ -127,26 +128,42 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
return return
} }
if len(targetNodes) == 0 { if len(sourceNodes) == 0 {
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here") klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
return return
} }
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
evictPodsFromTargetNodes( // stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
if !isNodeAboveTargetUtilization(nodeUsage) {
return false
}
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
evictPodsFromSourceNodes(
ctx, ctx,
targetNodes, sourceNodes,
lowNodes, lowNodes,
podEvictor, podEvictor,
evictable.IsEvictable, evictable.IsEvictable,
resourceNames) resourceNames,
"LowNodeUtilization",
continueEvictionCond)
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted()) klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
} }
// validateStrategyConfig checks if the strategy's config is valid // validateLowUtilizationStrategyConfig checks if the strategy's config is valid
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error { func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validate thresholds and targetThresholds config // validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil { if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err) return fmt.Errorf("thresholds config is not valid: %v", err)

View File

@@ -818,7 +818,7 @@ func TestLowNodeUtilization(t *testing.T) {
} }
} }
func TestValidateStrategyConfig(t *testing.T) { func TestValidateLowNodeUtilizationStrategyConfig(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
thresholds api.ResourceThresholds thresholds api.ResourceThresholds
@@ -958,7 +958,7 @@ func TestValidateStrategyConfig(t *testing.T) {
} }
for _, testCase := range tests { for _, testCase := range tests {
validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds) validateErr := validateLowUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
if validateErr == nil || testCase.errInfo == nil { if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo { if validateErr != testCase.errInfo {
@@ -972,9 +972,7 @@ func TestValidateStrategyConfig(t *testing.T) {
} }
} }
func TestLowNodeUtilizationWithTaints(t *testing.T) {
func TestWithTaints(t *testing.T) {
ctx := context.Background() ctx := context.Background()
strategy := api.DeschedulerStrategy{ strategy := api.DeschedulerStrategy{
Enabled: true, Enabled: true,

View File

@@ -40,6 +40,8 @@ type NodeUsage struct {
highResourceThreshold map[v1.ResourceName]*resource.Quantity highResourceThreshold map[v1.ResourceName]*resource.Quantity
} }
type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
// NodePodsMap is a set of (node, pods) pairs // NodePodsMap is a set of (node, pods) pairs
type NodePodsMap map[*v1.Node][]*v1.Pod type NodePodsMap map[*v1.Node][]*v1.Pod
@@ -50,7 +52,7 @@ const (
MaxResourcePercentage = 100 MaxResourcePercentage = 100
) )
func validateLowNodeUtilizationParams(params *api.StrategyParameters) error { func validateNodeUtilizationParams(params *api.StrategyParameters) error {
if params == nil || params.NodeResourceUtilizationThresholds == nil { if params == nil || params.NodeResourceUtilizationThresholds == nil {
return fmt.Errorf("NodeResourceUtilizationThresholds not set") return fmt.Errorf("NodeResourceUtilizationThresholds not set")
} }
@@ -172,18 +174,20 @@ func classifyNodes(
return lowNodes, highNodes return lowNodes, highNodes
} }
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not // evictPodsFromSourceNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option. // evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions. // TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes( func evictPodsFromSourceNodes(
ctx context.Context, ctx context.Context,
targetNodes, lowNodes []NodeUsage, sourceNodes, destinationNodes []NodeUsage,
podEvictor *evictions.PodEvictor, podEvictor *evictions.PodEvictor,
podFilter func(pod *v1.Pod) bool, podFilter func(pod *v1.Pod) bool,
resourceNames []v1.ResourceName, resourceNames []v1.ResourceName,
strategy string,
continueEviction continueEvictionCond,
) { ) {
sortNodesByUsage(targetNodes) sortNodesByUsage(sourceNodes)
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved // upper bound on total number of pods/cpu/memory and optional extended resources to be moved
totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{ totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{
@@ -192,9 +196,9 @@ func evictPodsFromTargetNodes(
v1.ResourceMemory: {}, v1.ResourceMemory: {},
} }
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes)) var taintsOfDestinationNodes = make(map[string][]v1.Taint, len(destinationNodes))
for _, node := range lowNodes { for _, node := range destinationNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints taintsOfDestinationNodes[node.node.Name] = node.node.Spec.Taints
for _, name := range resourceNames { for _, name := range resourceNames {
if _, ok := totalAvailableUsage[name]; !ok { if _, ok := totalAvailableUsage[name]; !ok {
@@ -218,7 +222,7 @@ func evictPodsFromTargetNodes(
} }
klog.V(1).InfoS("Total capacity to be moved", keysAndValues...) klog.V(1).InfoS("Total capacity to be moved", keysAndValues...)
for _, node := range targetNodes { for _, node := range sourceNodes {
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage) klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter) nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
@@ -232,7 +236,7 @@ func evictPodsFromTargetNodes(
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers") klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers. // sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods) podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfLowNodes, podEvictor) evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction)
klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage) klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage)
} }
} }
@@ -244,29 +248,18 @@ func evictPods(
totalAvailableUsage map[v1.ResourceName]*resource.Quantity, totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
taintsOfLowNodes map[string][]v1.Taint, taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor, podEvictor *evictions.PodEvictor,
strategy string,
continueEviction continueEvictionCond,
) { ) {
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved if continueEviction(nodeUsage, totalAvailableUsage) {
continueCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
if !isNodeAboveTargetUtilization(nodeUsage) {
return false
}
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
if continueCond(nodeUsage, totalAvailableUsage) {
for _, pod := range inputPods { for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod)) klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
continue continue
} }
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, "LowNodeUtilization") success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy)
if err != nil { if err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break break
@@ -299,8 +292,8 @@ func evictPods(
} }
klog.V(3).InfoS("Updated node usage", keysAndValues...) klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved // check if pods can be still evicted
if !continueCond(nodeUsage, totalAvailableUsage) { if !continueEviction(nodeUsage, totalAvailableUsage) {
break break
} }
} }