mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-28 14:41:10 +01:00
Addressing review comments
Both LowNode and HighNode utilization strategies evict only as many pods as there's free resources on other nodes. Thus, the resource fit test is always true by definition.
This commit is contained in:
@@ -737,9 +737,9 @@ The following strategies accept a `nodeFit` boolean parameter which can optimize
|
|||||||
|
|
||||||
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
|
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
|
||||||
- A `nodeSelector` on the pod
|
- A `nodeSelector` on the pod
|
||||||
- Any `Tolerations` on the pod and any `Taints` on the other nodes
|
- Any `tolerations` on the pod and any `taints` on the other nodes
|
||||||
- `nodeAffinity` on the pod
|
- `nodeAffinity` on the pod
|
||||||
- Resource `Requests` made by the pod and the resources available on other nodes
|
- Resource `requests` made by the pod and the resources available on other nodes
|
||||||
- Whether any of the other nodes are marked as `unschedulable`
|
- Whether any of the other nodes are marked as `unschedulable`
|
||||||
|
|
||||||
E.g.
|
E.g.
|
||||||
|
|||||||
@@ -99,28 +99,27 @@ func IsReady(node *v1.Node) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeFit returns true if the provided pod can probably be scheduled onto the provided node.
|
// NodeFit returns true if the provided pod can be scheduled onto the provided node.
|
||||||
// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
|
// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
|
||||||
// This function currently considers a subset of the Kubernetes Scheduler's predicates when
|
// This function currently considers a subset of the Kubernetes Scheduler's predicates when
|
||||||
// deciding if a pod would fit on a node, but more predicates may be added in the future.
|
// deciding if a pod would fit on a node, but more predicates may be added in the future.
|
||||||
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
|
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
|
||||||
// Check node selector and required affinity
|
// Check node selector and required affinity
|
||||||
var errors []error
|
var errors []error
|
||||||
ok, err := utils.PodMatchNodeSelector(pod, node)
|
if ok, err := utils.PodMatchNodeSelector(pod, node); err != nil {
|
||||||
if err != nil {
|
|
||||||
errors = append(errors, err)
|
errors = append(errors, err)
|
||||||
} else if !ok {
|
} else if !ok {
|
||||||
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
|
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
|
||||||
}
|
}
|
||||||
// Check taints (we only care about NoSchedule and NoExecute taints)
|
// Check taints (we only care about NoSchedule and NoExecute taints)
|
||||||
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
ok := utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||||
})
|
})
|
||||||
if !ok {
|
if !ok {
|
||||||
errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
|
errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
|
||||||
}
|
}
|
||||||
// Check if the pod can fit on a node based off it's requests
|
// Check if the pod can fit on a node based off it's requests
|
||||||
ok, reqErrors := FitsRequest(nodeIndexer, pod, node)
|
ok, reqErrors := fitsRequest(nodeIndexer, pod, node)
|
||||||
if !ok {
|
if !ok {
|
||||||
errors = append(errors, reqErrors...)
|
errors = append(errors, reqErrors...)
|
||||||
}
|
}
|
||||||
@@ -132,7 +131,7 @@ func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v
|
|||||||
return errors
|
return errors
|
||||||
}
|
}
|
||||||
|
|
||||||
// PodFitsAnyOtherNode checks if the given pod will probably fit any of the given nodes, besides the node
|
// PodFitsAnyOtherNode checks if the given pod will fit any of the given nodes, besides the node
|
||||||
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
|
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
|
||||||
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
@@ -155,7 +154,7 @@ func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// PodFitsAnyNode checks if the given pod will probably fit any of the given nodes. The predicates used
|
// PodFitsAnyNode checks if the given pod will fit any of the given nodes. The predicates used
|
||||||
// to determine if the pod will fit can be found in the NodeFit function.
|
// to determine if the pod will fit can be found in the NodeFit function.
|
||||||
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
@@ -173,7 +172,7 @@ func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod,
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// PodFitsCurrentNode checks if the given pod will probably fit onto the given node. The predicates used
|
// PodFitsCurrentNode checks if the given pod will fit onto the given node. The predicates used
|
||||||
// to determine if the pod will fit can be found in the NodeFit function.
|
// to determine if the pod will fit can be found in the NodeFit function.
|
||||||
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
|
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
|
||||||
errors := NodeFit(nodeIndexer, pod, node)
|
errors := NodeFit(nodeIndexer, pod, node)
|
||||||
@@ -195,9 +194,9 @@ func IsNodeUnschedulable(node *v1.Node) bool {
|
|||||||
return node.Spec.Unschedulable
|
return node.Spec.Unschedulable
|
||||||
}
|
}
|
||||||
|
|
||||||
// FitsRequest determines if a pod can fit on a node based on that pod's requests. It returns true if
|
// fitsRequest determines if a pod can fit on a node based on its resource requests. It returns true if
|
||||||
// the pod will fit.
|
// the pod will fit.
|
||||||
func FitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
|
func fitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
|
||||||
var insufficientResources []error
|
var insufficientResources []error
|
||||||
|
|
||||||
// Get pod requests
|
// Get pod requests
|
||||||
@@ -207,7 +206,7 @@ func FitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nod
|
|||||||
resourceNames = append(resourceNames, name)
|
resourceNames = append(resourceNames, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
availableResources, err := NodeAvailableResources(nodeIndexer, node, resourceNames)
|
availableResources, err := nodeAvailableResources(nodeIndexer, node, resourceNames)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, []error{err}
|
return false, []error{err}
|
||||||
}
|
}
|
||||||
@@ -215,15 +214,8 @@ func FitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nod
|
|||||||
podFitsOnNode := true
|
podFitsOnNode := true
|
||||||
for _, resource := range resourceNames {
|
for _, resource := range resourceNames {
|
||||||
podResourceRequest := podRequests[resource]
|
podResourceRequest := podRequests[resource]
|
||||||
var requestTooLarge bool
|
availableResource, ok := availableResources[resource]
|
||||||
switch resource {
|
if !ok || podResourceRequest.MilliValue() > availableResource.MilliValue() {
|
||||||
case v1.ResourceCPU:
|
|
||||||
requestTooLarge = podResourceRequest.MilliValue() > availableResources[resource].MilliValue()
|
|
||||||
default:
|
|
||||||
requestTooLarge = podResourceRequest.Value() > availableResources[resource].Value()
|
|
||||||
}
|
|
||||||
|
|
||||||
if requestTooLarge {
|
|
||||||
insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
|
insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
|
||||||
podFitsOnNode = false
|
podFitsOnNode = false
|
||||||
}
|
}
|
||||||
@@ -231,18 +223,34 @@ func FitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nod
|
|||||||
return podFitsOnNode, insufficientResources
|
return podFitsOnNode, insufficientResources
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeAvailableResources returns resources mapped to the quanitity available on the node.
|
// nodeAvailableResources returns resources mapped to the quanitity available on the node.
|
||||||
func NodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
|
func nodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
|
||||||
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
|
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
aggregatePodRequests := AggregatePodRequests(podsOnNode, resourceNames)
|
nodeUtilization := NodeUtilization(podsOnNode, resourceNames)
|
||||||
return nodeRemainingResources(node, aggregatePodRequests, resourceNames), nil
|
remainingResources := map[v1.ResourceName]*resource.Quantity{
|
||||||
|
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-nodeUtilization[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
|
||||||
|
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-nodeUtilization[v1.ResourceMemory].Value(), resource.BinarySI),
|
||||||
|
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-nodeUtilization[v1.ResourcePods].Value(), resource.DecimalSI),
|
||||||
|
}
|
||||||
|
for _, name := range resourceNames {
|
||||||
|
if !IsBasicResource(name) {
|
||||||
|
if _, exists := node.Status.Allocatable[name]; exists {
|
||||||
|
allocatableResource := node.Status.Allocatable[name]
|
||||||
|
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-nodeUtilization[name].Value(), resource.DecimalSI)
|
||||||
|
} else {
|
||||||
|
remainingResources[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return remainingResources, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AggregatePodRequests returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
|
// NodeUtilization returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
|
||||||
func AggregatePodRequests(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
|
func NodeUtilization(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
|
||||||
totalReqs := map[v1.ResourceName]*resource.Quantity{
|
totalReqs := map[v1.ResourceName]*resource.Quantity{
|
||||||
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
|
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
|
||||||
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
|
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
|
||||||
@@ -269,22 +277,6 @@ func AggregatePodRequests(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v
|
|||||||
return totalReqs
|
return totalReqs
|
||||||
}
|
}
|
||||||
|
|
||||||
func nodeRemainingResources(node *v1.Node, aggregatePodRequests map[v1.ResourceName]*resource.Quantity, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
|
|
||||||
remainingResources := map[v1.ResourceName]*resource.Quantity{
|
|
||||||
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-aggregatePodRequests[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
|
|
||||||
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-aggregatePodRequests[v1.ResourceMemory].Value(), resource.BinarySI),
|
|
||||||
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-aggregatePodRequests[v1.ResourcePods].Value(), resource.DecimalSI),
|
|
||||||
}
|
|
||||||
for _, name := range resourceNames {
|
|
||||||
if !IsBasicResource(name) {
|
|
||||||
allocatableResource := node.Status.Allocatable[name]
|
|
||||||
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-aggregatePodRequests[name].Value(), resource.DecimalSI)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return remainingResources
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsBasicResource checks if resource is basic native.
|
// IsBasicResource checks if resource is basic native.
|
||||||
func IsBasicResource(name v1.ResourceName) bool {
|
func IsBasicResource(name v1.ResourceName) bool {
|
||||||
switch name {
|
switch name {
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
core "k8s.io/client-go/testing"
|
|
||||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||||
"sigs.k8s.io/descheduler/test"
|
"sigs.k8s.io/descheduler/test"
|
||||||
)
|
)
|
||||||
@@ -196,11 +195,6 @@ func TestPodFitsCurrentNode(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
fakeClient := &fake.Clientset{}
|
|
||||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
|
||||||
return true, &v1.PodList{Items: nil}, nil
|
|
||||||
})
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|||||||
@@ -70,13 +70,17 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
|||||||
Unschedulable: true,
|
Unschedulable: true,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
node5 := test.BuildTestNode("n5", 1, 1, 1, nil)
|
|
||||||
|
|
||||||
node5 := test.BuildTestNode("n5", 2000, 3000, 10, nil)
|
node5 := test.BuildTestNode("n5", 2000, 3000, 10, nil)
|
||||||
node5.Spec.Taints = []v1.Taint{
|
node5.Spec.Taints = []v1.Taint{
|
||||||
createPreferNoScheduleTaint("testTaint", "test", 1),
|
createPreferNoScheduleTaint("testTaint", "test", 1),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
node6 := test.BuildTestNode("n6", 1, 1, 1, nil)
|
||||||
|
node6.Spec.Taints = []v1.Taint{
|
||||||
|
createPreferNoScheduleTaint("testTaint", "test", 1),
|
||||||
|
}
|
||||||
|
|
||||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
||||||
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
|
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
|
||||||
@@ -293,7 +297,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.",
|
description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.",
|
||||||
pods: []*v1.Pod{p2, p7, p9, p10},
|
pods: []*v1.Pod{p2, p7, p9, p10},
|
||||||
nodes: []*v1.Node{node1, node5},
|
nodes: []*v1.Node{node1, node6},
|
||||||
evictLocalStoragePods: false,
|
evictLocalStoragePods: false,
|
||||||
evictSystemCriticalPods: true,
|
evictSystemCriticalPods: true,
|
||||||
expectedEvictedPodCount: 0, //p2 and p7 can't be evicted
|
expectedEvictedPodCount: 0, //p2 and p7 can't be evicted
|
||||||
|
|||||||
@@ -695,112 +695,6 @@ func TestLowNodeUtilization(t *testing.T) {
|
|||||||
expectedPodsEvicted: 2,
|
expectedPodsEvicted: 2,
|
||||||
evictedPods: []string{},
|
evictedPods: []string{},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
|
|
||||||
thresholds: api.ResourceThresholds{
|
|
||||||
v1.ResourceCPU: 30,
|
|
||||||
v1.ResourcePods: 30,
|
|
||||||
},
|
|
||||||
targetThresholds: api.ResourceThresholds{
|
|
||||||
v1.ResourceCPU: 50,
|
|
||||||
v1.ResourcePods: 50,
|
|
||||||
},
|
|
||||||
nodes: []*v1.Node{
|
|
||||||
test.BuildTestNode(n1NodeName, 500, 200, 9, nil),
|
|
||||||
test.BuildTestNode(n2NodeName, 200, 200, 10, func(node *v1.Node) {
|
|
||||||
node.ObjectMeta.Labels = map[string]string{
|
|
||||||
nodeSelectorKey: notMatchingNodeSelectorValue,
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
pods: []*v1.Pod{
|
|
||||||
test.BuildTestPod("p1", 10, 0, n1NodeName, test.SetRSOwnerRef),
|
|
||||||
test.BuildTestPod("p2", 10, 0, n1NodeName, test.SetRSOwnerRef),
|
|
||||||
test.BuildTestPod("p3", 10, 0, n1NodeName, test.SetRSOwnerRef),
|
|
||||||
// These won't be evicted.
|
|
||||||
test.BuildTestPod("p4", 400, 100, n1NodeName, func(pod *v1.Pod) {
|
|
||||||
// A pod that requests too much CPU
|
|
||||||
test.SetNormalOwnerRef(pod)
|
|
||||||
}),
|
|
||||||
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
|
||||||
// A pod with local storage.
|
|
||||||
test.SetNormalOwnerRef(pod)
|
|
||||||
pod.Spec.Volumes = []v1.Volume{
|
|
||||||
{
|
|
||||||
Name: "sample",
|
|
||||||
VolumeSource: v1.VolumeSource{
|
|
||||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
|
||||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
||||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
// A Mirror Pod.
|
|
||||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
|
||||||
}),
|
|
||||||
test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
|
||||||
// A Critical Pod.
|
|
||||||
pod.Namespace = "kube-system"
|
|
||||||
priority := utils.SystemCriticalPriority
|
|
||||||
pod.Spec.Priority = &priority
|
|
||||||
}),
|
|
||||||
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
|
|
||||||
},
|
|
||||||
expectedPodsEvicted: 3,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
|
|
||||||
thresholds: api.ResourceThresholds{
|
|
||||||
v1.ResourceCPU: 30,
|
|
||||||
v1.ResourcePods: 30,
|
|
||||||
},
|
|
||||||
targetThresholds: api.ResourceThresholds{
|
|
||||||
v1.ResourceCPU: 50,
|
|
||||||
v1.ResourcePods: 50,
|
|
||||||
},
|
|
||||||
nodes: []*v1.Node{
|
|
||||||
test.BuildTestNode(n1NodeName, 500, 200, 9, nil),
|
|
||||||
test.BuildTestNode(n2NodeName, 200, 200, 10, func(node *v1.Node) {
|
|
||||||
node.ObjectMeta.Labels = map[string]string{
|
|
||||||
nodeSelectorKey: notMatchingNodeSelectorValue,
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
pods: []*v1.Pod{
|
|
||||||
test.BuildTestPod("p1", 10, 0, n1NodeName, test.SetRSOwnerRef),
|
|
||||||
test.BuildTestPod("p2", 10, 0, n1NodeName, test.SetRSOwnerRef),
|
|
||||||
test.BuildTestPod("p3", 10, 0, n1NodeName, test.SetRSOwnerRef),
|
|
||||||
// These won't be evicted.
|
|
||||||
test.BuildTestPod("p4", 400, 100, n1NodeName, func(pod *v1.Pod) {
|
|
||||||
// A pod that requests too much CPU
|
|
||||||
test.SetNormalOwnerRef(pod)
|
|
||||||
}),
|
|
||||||
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
|
||||||
// A pod with local storage.
|
|
||||||
test.SetNormalOwnerRef(pod)
|
|
||||||
pod.Spec.Volumes = []v1.Volume{
|
|
||||||
{
|
|
||||||
Name: "sample",
|
|
||||||
VolumeSource: v1.VolumeSource{
|
|
||||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
|
||||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
||||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
// A Mirror Pod.
|
|
||||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
|
||||||
}),
|
|
||||||
test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
|
||||||
// A Critical Pod.
|
|
||||||
pod.Namespace = "kube-system"
|
|
||||||
priority := utils.SystemCriticalPriority
|
|
||||||
pod.Spec.Priority = &priority
|
|
||||||
}),
|
|
||||||
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
|
|
||||||
},
|
|
||||||
expectedPodsEvicted: 3,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range testCases {
|
for _, test := range testCases {
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ func getNodeUsage(
|
|||||||
|
|
||||||
nodeUsageList = append(nodeUsageList, NodeUsage{
|
nodeUsageList = append(nodeUsageList, NodeUsage{
|
||||||
node: node,
|
node: node,
|
||||||
usage: nodeUtilization(node, pods, resourceNames),
|
usage: nodeutil.NodeUtilization(pods, resourceNames),
|
||||||
allPods: pods,
|
allPods: pods,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -279,7 +279,7 @@ func evictPodsFromSourceNodes(
|
|||||||
for _, node := range sourceNodes {
|
for _, node := range sourceNodes {
|
||||||
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
|
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
|
||||||
|
|
||||||
nonRemovablePods, removablePods := classifyPods(ctx, node.allPods, podFilter)
|
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
|
||||||
klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods))
|
klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods))
|
||||||
|
|
||||||
if len(removablePods) == 0 {
|
if len(removablePods) == 0 {
|
||||||
@@ -413,7 +413,7 @@ func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName {
|
|||||||
return resourceNames
|
return resourceNames
|
||||||
}
|
}
|
||||||
|
|
||||||
func classifyPods(ctx context.Context, pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
|
func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
|
||||||
var nonRemovablePods, removablePods []*v1.Pod
|
var nonRemovablePods, removablePods []*v1.Pod
|
||||||
|
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
@@ -437,7 +437,7 @@ func averageNodeBasicresources(nodes []*v1.Node, getPodsAssignedToNode podutil.G
|
|||||||
numberOfNodes--
|
numberOfNodes--
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
usage := nodeUtilization(node, pods, resourceNames)
|
usage := nodeutil.NodeUtilization(pods, resourceNames)
|
||||||
nodeCapacity := node.Status.Capacity
|
nodeCapacity := node.Status.Capacity
|
||||||
if len(node.Status.Allocatable) > 0 {
|
if len(node.Status.Allocatable) > 0 {
|
||||||
nodeCapacity = node.Status.Allocatable
|
nodeCapacity = node.Status.Allocatable
|
||||||
|
|||||||
@@ -168,7 +168,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
|
|||||||
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
|
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
balanceDomains(ctx, client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodes)
|
balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -223,7 +223,6 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol
|
|||||||
// [5, 5, 5, 5, 5, 5]
|
// [5, 5, 5, 5, 5, 5]
|
||||||
// (assuming even distribution by the scheduler of the evicted pods)
|
// (assuming even distribution by the scheduler of the evicted pods)
|
||||||
func balanceDomains(
|
func balanceDomains(
|
||||||
ctx context.Context,
|
|
||||||
client clientset.Interface,
|
client clientset.Interface,
|
||||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||||
podsForEviction map[*v1.Pod]struct{},
|
podsForEviction map[*v1.Pod]struct{},
|
||||||
@@ -234,7 +233,7 @@ func balanceDomains(
|
|||||||
nodes []*v1.Node) {
|
nodes []*v1.Node) {
|
||||||
|
|
||||||
idealAvg := sumPods / float64(len(constraintTopologies))
|
idealAvg := sumPods / float64(len(constraintTopologies))
|
||||||
sortedDomains := sortDomains(ctx, constraintTopologies, isEvictable)
|
sortedDomains := sortDomains(constraintTopologies, isEvictable)
|
||||||
// i is the index for belowOrEqualAvg
|
// i is the index for belowOrEqualAvg
|
||||||
// j is the index for aboveAvg
|
// j is the index for aboveAvg
|
||||||
i := 0
|
i := 0
|
||||||
@@ -274,6 +273,17 @@ func balanceDomains(
|
|||||||
// also (just for tracking), add them to the list of pods in the lower topology
|
// also (just for tracking), add them to the list of pods in the lower topology
|
||||||
aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:]
|
aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:]
|
||||||
for k := range aboveToEvict {
|
for k := range aboveToEvict {
|
||||||
|
// PodFitsAnyOtherNode excludes the current node because, for the sake of domain balancing only, we care about if there is any other
|
||||||
|
// place it could theoretically fit.
|
||||||
|
// If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading
|
||||||
|
// Also, if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node,
|
||||||
|
// don't bother evicting it as it will just end up back on the same node
|
||||||
|
// however we still account for it "being evicted" so the algorithm can complete
|
||||||
|
// TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod
|
||||||
|
// but still try to get the required # of movePods (instead of just chopping that value off the slice above).
|
||||||
|
// In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes.
|
||||||
|
// This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm.
|
||||||
|
// So, a better selection heuristic could improve performance.
|
||||||
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
|
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
|
||||||
klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
|
klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
|
||||||
continue
|
continue
|
||||||
@@ -293,7 +303,7 @@ func balanceDomains(
|
|||||||
// 3. pods in descending priority
|
// 3. pods in descending priority
|
||||||
// 4. all other pods
|
// 4. all other pods
|
||||||
// We then pop pods off the back of the list for eviction
|
// We then pop pods off the back of the list for eviction
|
||||||
func sortDomains(ctx context.Context, constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(pod *v1.Pod) bool) []topology {
|
func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(pod *v1.Pod) bool) []topology {
|
||||||
sortedTopologies := make([]topology, 0, len(constraintTopologyPairs))
|
sortedTopologies := make([]topology, 0, len(constraintTopologyPairs))
|
||||||
// sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted)
|
// sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted)
|
||||||
for pair, list := range constraintTopologyPairs {
|
for pair, list := range constraintTopologyPairs {
|
||||||
|
|||||||
Reference in New Issue
Block a user