mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-28 06:29:29 +01:00
Refractor - Modify the common functions to be used by high utilisation
This commit is contained in:
@@ -20,6 +20,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
|
||||||
@@ -33,7 +34,7 @@ import (
|
|||||||
// to calculate nodes' utilization and not the actual resource usage.
|
// to calculate nodes' utilization and not the actual resource usage.
|
||||||
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
|
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
|
||||||
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
|
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
|
||||||
if err := validateLowNodeUtilizationParams(strategy.Params); err != nil {
|
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
|
||||||
klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
|
klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -50,7 +51,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
|||||||
|
|
||||||
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
||||||
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
||||||
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
|
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
|
||||||
klog.ErrorS(err, "LowNodeUtilization config is not valid")
|
klog.ErrorS(err, "LowNodeUtilization config is not valid")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -69,7 +70,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
|||||||
}
|
}
|
||||||
resourceNames := getResourceNames(thresholds)
|
resourceNames := getResourceNames(thresholds)
|
||||||
|
|
||||||
lowNodes, targetNodes := classifyNodes(
|
lowNodes, sourceNodes := classifyNodes(
|
||||||
getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames),
|
getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames),
|
||||||
// The node has to be schedulable (to be able to move workload there)
|
// The node has to be schedulable (to be able to move workload there)
|
||||||
func(node *v1.Node, usage NodeUsage) bool {
|
func(node *v1.Node, usage NodeUsage) bool {
|
||||||
@@ -110,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...)
|
klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...)
|
||||||
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(targetNodes))
|
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
|
||||||
|
|
||||||
if len(lowNodes) == 0 {
|
if len(lowNodes) == 0 {
|
||||||
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
||||||
@@ -127,26 +128,42 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(targetNodes) == 0 {
|
if len(sourceNodes) == 0 {
|
||||||
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
|
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||||
|
|
||||||
evictPodsFromTargetNodes(
|
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
||||||
|
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||||
|
if !isNodeAboveTargetUtilization(nodeUsage) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for name := range totalAvailableUsage {
|
||||||
|
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
evictPodsFromSourceNodes(
|
||||||
ctx,
|
ctx,
|
||||||
targetNodes,
|
sourceNodes,
|
||||||
lowNodes,
|
lowNodes,
|
||||||
podEvictor,
|
podEvictor,
|
||||||
evictable.IsEvictable,
|
evictable.IsEvictable,
|
||||||
resourceNames)
|
resourceNames,
|
||||||
|
"LowNodeUtilization",
|
||||||
|
continueEvictionCond)
|
||||||
|
|
||||||
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
|
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateStrategyConfig checks if the strategy's config is valid
|
// validateLowUtilizationStrategyConfig checks if the strategy's config is valid
|
||||||
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
|
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
|
||||||
// validate thresholds and targetThresholds config
|
// validate thresholds and targetThresholds config
|
||||||
if err := validateThresholds(thresholds); err != nil {
|
if err := validateThresholds(thresholds); err != nil {
|
||||||
return fmt.Errorf("thresholds config is not valid: %v", err)
|
return fmt.Errorf("thresholds config is not valid: %v", err)
|
||||||
|
|||||||
@@ -818,7 +818,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestValidateStrategyConfig(t *testing.T) {
|
func TestValidateLowNodeUtilizationStrategyConfig(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
thresholds api.ResourceThresholds
|
thresholds api.ResourceThresholds
|
||||||
@@ -958,7 +958,7 @@ func TestValidateStrategyConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, testCase := range tests {
|
for _, testCase := range tests {
|
||||||
validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds)
|
validateErr := validateLowUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
|
||||||
|
|
||||||
if validateErr == nil || testCase.errInfo == nil {
|
if validateErr == nil || testCase.errInfo == nil {
|
||||||
if validateErr != testCase.errInfo {
|
if validateErr != testCase.errInfo {
|
||||||
@@ -972,9 +972,7 @@ func TestValidateStrategyConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLowNodeUtilizationWithTaints(t *testing.T) {
|
||||||
|
|
||||||
func TestWithTaints(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
strategy := api.DeschedulerStrategy{
|
strategy := api.DeschedulerStrategy{
|
||||||
Enabled: true,
|
Enabled: true,
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ type NodeUsage struct {
|
|||||||
highResourceThreshold map[v1.ResourceName]*resource.Quantity
|
highResourceThreshold map[v1.ResourceName]*resource.Quantity
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
|
||||||
|
|
||||||
// NodePodsMap is a set of (node, pods) pairs
|
// NodePodsMap is a set of (node, pods) pairs
|
||||||
type NodePodsMap map[*v1.Node][]*v1.Pod
|
type NodePodsMap map[*v1.Node][]*v1.Pod
|
||||||
|
|
||||||
@@ -50,7 +52,7 @@ const (
|
|||||||
MaxResourcePercentage = 100
|
MaxResourcePercentage = 100
|
||||||
)
|
)
|
||||||
|
|
||||||
func validateLowNodeUtilizationParams(params *api.StrategyParameters) error {
|
func validateNodeUtilizationParams(params *api.StrategyParameters) error {
|
||||||
if params == nil || params.NodeResourceUtilizationThresholds == nil {
|
if params == nil || params.NodeResourceUtilizationThresholds == nil {
|
||||||
return fmt.Errorf("NodeResourceUtilizationThresholds not set")
|
return fmt.Errorf("NodeResourceUtilizationThresholds not set")
|
||||||
}
|
}
|
||||||
@@ -172,18 +174,20 @@ func classifyNodes(
|
|||||||
return lowNodes, highNodes
|
return lowNodes, highNodes
|
||||||
}
|
}
|
||||||
|
|
||||||
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
|
// evictPodsFromSourceNodes evicts pods based on priority, if all the pods on the node have priority, if not
|
||||||
// evicts them based on QoS as fallback option.
|
// evicts them based on QoS as fallback option.
|
||||||
// TODO: @ravig Break this function into smaller functions.
|
// TODO: @ravig Break this function into smaller functions.
|
||||||
func evictPodsFromTargetNodes(
|
func evictPodsFromSourceNodes(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
targetNodes, lowNodes []NodeUsage,
|
sourceNodes, destinationNodes []NodeUsage,
|
||||||
podEvictor *evictions.PodEvictor,
|
podEvictor *evictions.PodEvictor,
|
||||||
podFilter func(pod *v1.Pod) bool,
|
podFilter func(pod *v1.Pod) bool,
|
||||||
resourceNames []v1.ResourceName,
|
resourceNames []v1.ResourceName,
|
||||||
|
strategy string,
|
||||||
|
continueEviction continueEvictionCond,
|
||||||
) {
|
) {
|
||||||
|
|
||||||
sortNodesByUsage(targetNodes)
|
sortNodesByUsage(sourceNodes)
|
||||||
|
|
||||||
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
|
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
|
||||||
totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{
|
totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{
|
||||||
@@ -192,9 +196,9 @@ func evictPodsFromTargetNodes(
|
|||||||
v1.ResourceMemory: {},
|
v1.ResourceMemory: {},
|
||||||
}
|
}
|
||||||
|
|
||||||
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
|
var taintsOfDestinationNodes = make(map[string][]v1.Taint, len(destinationNodes))
|
||||||
for _, node := range lowNodes {
|
for _, node := range destinationNodes {
|
||||||
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
|
taintsOfDestinationNodes[node.node.Name] = node.node.Spec.Taints
|
||||||
|
|
||||||
for _, name := range resourceNames {
|
for _, name := range resourceNames {
|
||||||
if _, ok := totalAvailableUsage[name]; !ok {
|
if _, ok := totalAvailableUsage[name]; !ok {
|
||||||
@@ -218,7 +222,7 @@ func evictPodsFromTargetNodes(
|
|||||||
}
|
}
|
||||||
klog.V(1).InfoS("Total capacity to be moved", keysAndValues...)
|
klog.V(1).InfoS("Total capacity to be moved", keysAndValues...)
|
||||||
|
|
||||||
for _, node := range targetNodes {
|
for _, node := range sourceNodes {
|
||||||
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
|
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
|
||||||
|
|
||||||
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
|
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
|
||||||
@@ -232,7 +236,7 @@ func evictPodsFromTargetNodes(
|
|||||||
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
|
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
|
||||||
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||||
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
|
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
|
||||||
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfLowNodes, podEvictor)
|
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction)
|
||||||
klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage)
|
klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -244,29 +248,18 @@ func evictPods(
|
|||||||
totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
|
totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
|
||||||
taintsOfLowNodes map[string][]v1.Taint,
|
taintsOfLowNodes map[string][]v1.Taint,
|
||||||
podEvictor *evictions.PodEvictor,
|
podEvictor *evictions.PodEvictor,
|
||||||
|
strategy string,
|
||||||
|
continueEviction continueEvictionCond,
|
||||||
) {
|
) {
|
||||||
|
|
||||||
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
if continueEviction(nodeUsage, totalAvailableUsage) {
|
||||||
continueCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
|
||||||
if !isNodeAboveTargetUtilization(nodeUsage) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for name := range totalAvailableUsage {
|
|
||||||
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if continueCond(nodeUsage, totalAvailableUsage) {
|
|
||||||
for _, pod := range inputPods {
|
for _, pod := range inputPods {
|
||||||
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
||||||
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
|
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, "LowNodeUtilization")
|
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
|
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
|
||||||
break
|
break
|
||||||
@@ -299,8 +292,8 @@ func evictPods(
|
|||||||
}
|
}
|
||||||
|
|
||||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||||
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
|
// check if pods can be still evicted
|
||||||
if !continueCond(nodeUsage, totalAvailableUsage) {
|
if !continueEviction(nodeUsage, totalAvailableUsage) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user