1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-28 14:41:10 +01:00

NodeFit parameter now considers pod requests

This commit is contained in:
RyanDevlin
2022-01-12 11:49:01 -05:00
committed by Jan Chaloupka
parent eac3b4b54a
commit 16eb9063b6
27 changed files with 1497 additions and 648 deletions

View File

@@ -739,6 +739,7 @@ The following strategies accept a `nodeFit` boolean parameter which can optimize
- A `nodeSelector` on the pod - A `nodeSelector` on the pod
- Any `Tolerations` on the pod and any `Taints` on the other nodes - Any `Tolerations` on the pod and any `Taints` on the other nodes
- `nodeAffinity` on the pod - `nodeAffinity` on the pod
- Resource `Requests` made by the pod and the resources available on other nodes
- Whether any of the other nodes are marked as `unschedulable` - Whether any of the other nodes are marked as `unschedulable`
E.g. E.g.

View File

@@ -283,6 +283,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
deschedulerPolicy.MaxNoOfPodsToEvictPerNode, deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace, deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
nodes, nodes,
getPodsAssignedToNode,
evictLocalStoragePods, evictLocalStoragePods,
evictSystemCriticalPods, evictSystemCriticalPods,
ignorePvcPods, ignorePvcPods,

View File

@@ -51,6 +51,7 @@ type namespacePodEvictCount map[string]uint
type PodEvictor struct { type PodEvictor struct {
client clientset.Interface client clientset.Interface
nodes []*v1.Node nodes []*v1.Node
nodeIndexer podutil.GetPodsAssignedToNodeFunc
policyGroupVersion string policyGroupVersion string
dryRun bool dryRun bool
maxPodsToEvictPerNode *uint maxPodsToEvictPerNode *uint
@@ -71,6 +72,7 @@ func NewPodEvictor(
maxPodsToEvictPerNode *uint, maxPodsToEvictPerNode *uint,
maxPodsToEvictPerNamespace *uint, maxPodsToEvictPerNamespace *uint,
nodes []*v1.Node, nodes []*v1.Node,
nodeIndexer podutil.GetPodsAssignedToNodeFunc,
evictLocalStoragePods bool, evictLocalStoragePods bool,
evictSystemCriticalPods bool, evictSystemCriticalPods bool,
ignorePvcPods bool, ignorePvcPods bool,
@@ -87,6 +89,7 @@ func NewPodEvictor(
return &PodEvictor{ return &PodEvictor{
client: client, client: client,
nodes: nodes, nodes: nodes,
nodeIndexer: nodeIndexer,
policyGroupVersion: policyGroupVersion, policyGroupVersion: policyGroupVersion,
dryRun: dryRun, dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode, maxPodsToEvictPerNode: maxPodsToEvictPerNode,
@@ -296,7 +299,7 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
} }
if options.nodeFit { if options.nodeFit {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if !nodeutil.PodFitsAnyOtherNode(pod, pe.nodes) { if !nodeutil.PodFitsAnyOtherNode(pe.nodeIndexer, pod, pe.nodes) {
return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable") return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable")
} }
return nil return nil

View File

@@ -21,8 +21,10 @@ import (
"testing" "testing"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing" core "k8s.io/client-go/testing"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
@@ -80,9 +82,9 @@ func TestIsEvictable(t *testing.T) {
nodeLabelKey := "datacenter" nodeLabelKey := "datacenter"
nodeLabelValue := "east" nodeLabelValue := "east"
type testCase struct { type testCase struct {
pod *v1.Pod description string
pods []*v1.Pod
nodes []*v1.Node nodes []*v1.Node
runBefore func(*v1.Pod, []*v1.Node)
evictFailedBarePods bool evictFailedBarePods bool
evictLocalStoragePods bool evictLocalStoragePods bool
evictSystemCriticalPods bool evictSystemCriticalPods bool
@@ -92,80 +94,96 @@ func TestIsEvictable(t *testing.T) {
} }
testCases := []testCase{ testCases := []testCase{
{ // Failed pod eviction with no ownerRefs. {
pod: test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, nil), description: "Failed pod eviction with no ownerRefs",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Status.Phase = v1.PodFailed pod.Status.Phase = v1.PodFailed
}),
}, },
evictFailedBarePods: false, evictFailedBarePods: false,
result: false, result: false,
}, { // Normal pod eviction with no ownerRefs and evictFailedBarePods enabled }, {
pod: test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil), description: "Normal pod eviction with no ownerRefs and evictFailedBarePods enabled",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil)},
},
evictFailedBarePods: true, evictFailedBarePods: true,
result: false, result: false,
}, { // Failed pod eviction with no ownerRefs }, {
pod: test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, nil), description: "Failed pod eviction with no ownerRefs",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Status.Phase = v1.PodFailed pod.Status.Phase = v1.PodFailed
}),
}, },
evictFailedBarePods: true, evictFailedBarePods: true,
result: true, result: true,
}, { // Normal pod eviction with normal ownerRefs }, {
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), description: "Normal pod eviction with normal ownerRefs",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil), description: "Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Normal pod eviction with replicaSet ownerRefs }, {
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil), description: "Normal pod eviction with replicaSet ownerRefs",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil), description: "Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p4", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Normal pod eviction with statefulSet ownerRefs }, {
pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil), description: "Normal pod eviction with statefulSet ownerRefs",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() test.BuildTestPod("p18", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil), description: "Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p19", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false }, {
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil), description: "Pod not evicted because it is bound to a PV and evictLocalStoragePods = false",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p5", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{ pod.Spec.Volumes = []v1.Volume{
{ {
@@ -177,13 +195,15 @@ func TestIsEvictable(t *testing.T) {
}, },
}, },
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: false, result: false,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true }, {
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil), description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = true",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p6", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{ pod.Spec.Volumes = []v1.Volume{
{ {
@@ -195,13 +215,15 @@ func TestIsEvictable(t *testing.T) {
}, },
}, },
} }
}),
}, },
evictLocalStoragePods: true, evictLocalStoragePods: true,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil), description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p7", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{ pod.Spec.Volumes = []v1.Volume{
@@ -214,139 +236,167 @@ func TestIsEvictable(t *testing.T) {
}, },
}, },
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Pod not evicted becasuse it is part of a daemonSet }, {
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil), description: "Pod not evicted becasuse it is part of a daemonSet",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: false, result: false,
}, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Pod not evicted becasuse it is a mirror pod }, {
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil), description: "Pod not evicted becasuse it is a mirror poddsa",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p10", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation() pod.Annotations = test.GetMirrorPodAnnotation()
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: false, result: false,
}, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation() pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Pod not evicted becasuse it has system critical priority }, {
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil), description: "Pod not evicted becasuse it has system critical priority",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority pod.Spec.Priority = &priority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: false, result: false,
}, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p13", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority pod.Spec.Priority = &priority
pod.Annotations = map[string]string{ pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true", "descheduler.alpha.kubernetes.io/evict": "true",
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
result: true, result: true,
}, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold }, {
pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil), description: "Pod not evicted becasuse it has a priority higher than the configured priority threshold",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p14", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority pod.Spec.Priority = &highPriority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
priorityThreshold: &lowPriority, priorityThreshold: &lowPriority,
result: false, result: false,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p15", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority pod.Spec.Priority = &highPriority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
priorityThreshold: &lowPriority, priorityThreshold: &lowPriority,
result: true, result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true }, {
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority pod.Spec.Priority = &priority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: true, evictSystemCriticalPods: true,
result: true, result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
priority := utils.SystemCriticalPriority priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority pod.Spec.Priority = &priority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: true, evictSystemCriticalPods: true,
result: true, result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true }, {
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority pod.Spec.Priority = &highPriority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: true, evictSystemCriticalPods: true,
priorityThreshold: &lowPriority, priorityThreshold: &lowPriority,
result: true, result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation }, {
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil), description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { pods: []*v1.Pod{
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority pod.Spec.Priority = &highPriority
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: true, evictSystemCriticalPods: true,
priorityThreshold: &lowPriority, priorityThreshold: &lowPriority,
result: true, result: true,
}, { // Pod with no tolerations running on normal node, all other nodes tainted }, {
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), description: "Pod with no tolerations running on normal node, all other nodes tainted",
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, pods: []*v1.Pod{
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
for _, node := range nodes { },
nodes: []*v1.Node{
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{ node.Spec.Taints = []v1.Taint{
{ {
Key: nodeTaintKey, Key: nodeTaintKey,
@@ -354,14 +404,26 @@ func TestIsEvictable(t *testing.T) {
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}, },
} }
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
nodeFit: true, nodeFit: true,
result: false, result: false,
}, { // Pod with correct tolerations running on normal node, all other nodes tainted }, {
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { description: "Pod with correct tolerations running on normal node, all other nodes tainted",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Tolerations = []v1.Toleration{ pod.Spec.Tolerations = []v1.Toleration{
{ {
Key: nodeTaintKey, Key: nodeTaintKey,
@@ -370,11 +432,9 @@ func TestIsEvictable(t *testing.T) {
}, },
} }
}), }),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, },
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { nodes: []*v1.Node{
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
for _, node := range nodes {
node.Spec.Taints = []v1.Taint{ node.Spec.Taints = []v1.Taint{
{ {
Key: nodeTaintKey, Key: nodeTaintKey,
@@ -382,67 +442,216 @@ func TestIsEvictable(t *testing.T) {
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}, },
} }
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
nodeFit: true, nodeFit: true,
result: true, result: true,
}, { // Pod with incorrect node selector }, {
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { description: "Pod with incorrect node selector",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{ pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: "fail", nodeLabelKey: "fail",
} }
}), }),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, },
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { nodes: []*v1.Node{
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{ node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
} }
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
nodeFit: true, nodeFit: true,
result: false, result: false,
}, { // Pod with correct node selector }, {
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { description: "Pod with correct node selector",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{ pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
} }
}), }),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, },
runBefore: func(pod *v1.Pod, nodes []*v1.Node) { nodes: []*v1.Node{
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{ node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
} }
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
} }
}),
}, },
evictLocalStoragePods: false, evictLocalStoragePods: false,
evictSystemCriticalPods: false, evictSystemCriticalPods: false,
nodeFit: true, nodeFit: true,
result: true, result: true,
}, {
description: "Pod with correct node selector, but only available node doesn't have enough CPU",
pods: []*v1.Pod{
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2-TEST", 10, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3-TEST", 10, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, {
description: "Pod with correct node selector, and one node has enough memory",
pods: []*v1.Pod{
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestPod("node2-pod-10GB-mem", 20, 10, "node2", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
test.BuildTestPod("node3-pod-10GB-mem", 20, 10, "node3", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3", 100, 20, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, {
description: "Pod with correct node selector, but both nodes don't have enough memory",
pods: []*v1.Pod{
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestPod("node2-pod-10GB-mem", 10, 10, "node2", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
test.BuildTestPod("node3-pod-10GB-mem", 10, 10, "node3", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3", 100, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, },
} }
for _, test := range testCases { for _, test := range testCases {
test.runBefore(test.pod, test.nodes)
t.Run(test.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
nodes := append(test.nodes, n1) nodes := append(test.nodes, n1)
var objs []runtime.Object
for _, node := range test.nodes {
objs = append(objs, node)
}
for _, pod := range test.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := &PodEvictor{ podEvictor := &PodEvictor{
client: fakeClient,
nodes: nodes,
nodeIndexer: getPodsAssignedToNode,
policyGroupVersion: policyv1.SchemeGroupVersion.String(),
dryRun: false,
maxPodsToEvictPerNode: nil,
maxPodsToEvictPerNamespace: nil,
evictLocalStoragePods: test.evictLocalStoragePods, evictLocalStoragePods: test.evictLocalStoragePods,
evictSystemCriticalPods: test.evictSystemCriticalPods, evictSystemCriticalPods: test.evictSystemCriticalPods,
evictFailedBarePods: test.evictFailedBarePods, evictFailedBarePods: test.evictFailedBarePods,
nodes: nodes,
} }
evictable := podEvictor.Evictable()
var opts []func(opts *Options) var opts []func(opts *Options)
if test.priorityThreshold != nil { if test.priorityThreshold != nil {
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold)) opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
@@ -450,13 +659,13 @@ func TestIsEvictable(t *testing.T) {
if test.nodeFit { if test.nodeFit {
opts = append(opts, WithNodeFit(true)) opts = append(opts, WithNodeFit(true))
} }
evictable = podEvictor.Evictable(opts...) evictable := podEvictor.Evictable(opts...)
result := evictable.IsEvictable(test.pod) result := evictable.IsEvictable(test.pods[0])
if result != test.result { if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pods[0].Name, test.result, result)
} }
})
} }
} }
func TestPodTypes(t *testing.T) { func TestPodTypes(t *testing.T) {

View File

@@ -18,13 +18,16 @@ package node
import ( import (
"context" "context"
"fmt"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
coreinformers "k8s.io/client-go/informers/core/v1" coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2" "k8s.io/klog/v2"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/pkg/utils"
) )
@@ -96,32 +99,91 @@ func IsReady(node *v1.Node) bool {
return true return true
} }
// PodFitsAnyOtherNode checks if the given pod fits any of the given nodes, besides the node // NodeFit returns true if the provided pod can probably be scheduled onto the provided node.
// the pod is already running on. The node fit is based on multiple criteria, like, pod node selector // This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
// matching the node label (including affinity), the taints on the node, and the node being schedulable or not. // This function currently considers a subset of the Kubernetes Scheduler's predicates when
func PodFitsAnyOtherNode(pod *v1.Pod, nodes []*v1.Node) bool { // deciding if a pod would fit on a node, but more predicates may be added in the future.
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
for _, node := range nodes {
// Skip node pod is already on
if node.Name == pod.Spec.NodeName {
continue
}
// Check node selector and required affinity // Check node selector and required affinity
var errors []error
ok, err := utils.PodMatchNodeSelector(pod, node) ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok { if err != nil {
continue errors = append(errors, err)
} else if !ok {
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
} }
// Check taints (we only care about NoSchedule and NoExecute taints) // Check taints (we only care about NoSchedule and NoExecute taints)
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool { ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
}) })
if !ok { if !ok {
continue errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
}
// Check if the pod can fit on a node based off it's requests
ok, reqErrors := FitsRequest(nodeIndexer, pod, node)
if !ok {
errors = append(errors, reqErrors...)
} }
// Check if node is schedulable // Check if node is schedulable
if !IsNodeUnschedulable(node) { if IsNodeUnschedulable(node) {
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node)) errors = append(errors, fmt.Errorf("node is not schedulable"))
}
return errors
}
// PodFitsAnyOtherNode checks if the given pod will probably fit any of the given nodes, besides the node
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// Skip node pod is already on
if node.Name == pod.Spec.NodeName {
continue
}
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
}
return false
}
// PodFitsAnyNode checks if the given pod will probably fit any of the given nodes. The predicates used
// to determine if the pod will fit can be found in the NodeFit function.
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
}
return false
}
// PodFitsCurrentNode checks if the given pod will probably fit onto the given node. The predicates used
// to determine if the pod will fit can be found in the NodeFit function.
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
} }
} }
return false return false
@@ -133,39 +195,102 @@ func IsNodeUnschedulable(node *v1.Node) bool {
return node.Spec.Unschedulable return node.Spec.Unschedulable
} }
// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on // FitsRequest determines if a pod can fit on a node based on that pod's requests. It returns true if
// multiple criteria, like, pod node selector matching the node label, node // the pod will fit.
// being schedulable or not. func FitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool { var insufficientResources []error
for _, node := range nodes {
ok, err := utils.PodMatchNodeSelector(pod, node) // Get pod requests
if err != nil || !ok { podRequests, _ := utils.PodRequestsAndLimits(pod)
continue resourceNames := make([]v1.ResourceName, 0, len(podRequests))
for name := range podRequests {
resourceNames = append(resourceNames, name)
} }
if !IsNodeUnschedulable(node) {
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}
}
return false
}
// PodFitsCurrentNode checks if the given pod fits on the given node if the pod
// node selector matches the node label.
func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
ok, err := utils.PodMatchNodeSelector(pod, node)
availableResources, err := NodeAvailableResources(nodeIndexer, node, resourceNames)
if err != nil { if err != nil {
klog.ErrorS(err, "Failed to match node selector") return false, []error{err}
return false
} }
if !ok { podFitsOnNode := true
klog.V(2).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) for _, resource := range resourceNames {
return false podResourceRequest := podRequests[resource]
var requestTooLarge bool
switch resource {
case v1.ResourceCPU:
requestTooLarge = podResourceRequest.MilliValue() > availableResources[resource].MilliValue()
default:
requestTooLarge = podResourceRequest.Value() > availableResources[resource].Value()
} }
klog.V(2).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) if requestTooLarge {
return true insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
podFitsOnNode = false
}
}
return podFitsOnNode, insufficientResources
}
// NodeAvailableResources returns resources mapped to the quanitity available on the node.
func NodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
if err != nil {
return nil, err
}
aggregatePodRequests := AggregatePodRequests(podsOnNode, resourceNames)
return nodeRemainingResources(node, aggregatePodRequests, resourceNames), nil
}
// AggregatePodRequests returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
func AggregatePodRequests(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
}
for _, name := range resourceNames {
if !IsBasicResource(name) {
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for _, name := range resourceNames {
quantity, ok := req[name]
if ok && name != v1.ResourcePods {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
return totalReqs
}
func nodeRemainingResources(node *v1.Node, aggregatePodRequests map[v1.ResourceName]*resource.Quantity, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
remainingResources := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-aggregatePodRequests[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-aggregatePodRequests[v1.ResourceMemory].Value(), resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-aggregatePodRequests[v1.ResourcePods].Value(), resource.DecimalSI),
}
for _, name := range resourceNames {
if !IsBasicResource(name) {
allocatableResource := node.Status.Allocatable[name]
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-aggregatePodRequests[name].Value(), resource.DecimalSI)
}
}
return remainingResources
}
// IsBasicResource checks if resource is basic native.
func IsBasicResource(name v1.ResourceName) bool {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
return true
default:
return false
}
} }

View File

@@ -21,9 +21,13 @@ import (
"testing" "testing"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test" "sigs.k8s.io/descheduler/test"
) )
@@ -147,13 +151,13 @@ func TestPodFitsCurrentNode(t *testing.T) {
}, },
}, },
}, },
node: &v1.Node{ node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
ObjectMeta: metav1.ObjectMeta{ node.ObjectMeta.Labels = map[string]string{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}, }
},
}, node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
success: true, success: true,
}, },
{ {
@@ -181,27 +185,53 @@ func TestPodFitsCurrentNode(t *testing.T) {
}, },
}, },
}, },
node: &v1.Node{ node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
ObjectMeta: metav1.ObjectMeta{ node.ObjectMeta.Labels = map[string]string{
Labels: map[string]string{
nodeLabelKey: "no", nodeLabelKey: "no",
}, }
},
}, node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
success: false, success: false,
}, },
} }
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: nil}, nil
})
for _, tc := range tests { for _, tc := range tests {
actual := PodFitsCurrentNode(tc.pod, tc.node) t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
objs = append(objs, tc.node)
objs = append(objs, tc.pod)
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
actual := PodFitsCurrentNode(getPodsAssignedToNode, tc.pod, tc.node)
if actual != tc.success { if actual != tc.success {
t.Errorf("Test %#v failed", tc.description) t.Errorf("Test %#v failed", tc.description)
} }
})
} }
} }
func TestPodFitsAnyOtherNode(t *testing.T) { func TestPodFitsAnyOtherNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode" nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes" nodeLabelValue := "yes"
nodeTaintKey := "hardware" nodeTaintKey := "hardware"
@@ -215,238 +245,527 @@ func TestPodFitsAnyOtherNode(t *testing.T) {
pod *v1.Pod pod *v1.Pod
nodes []*v1.Node nodes []*v1.Node
success bool success bool
podsOnNodes []*v1.Pod
}{ }{
{ {
description: "Pod fits another node matching node affinity", description: "Pod fits another node matching node affinity",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
nodes: []*v1.Node{ pod.Spec.NodeSelector = map[string]string{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}, }
}, }),
}, nodes: []*v1.Node{
{ test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
ObjectMeta: metav1.ObjectMeta{ node.ObjectMeta.Labels = map[string]string{
Name: nodeNames[1], nodeLabelKey: nodeLabelValue,
Labels: map[string]string{ }
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no", nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
}, podsOnNodes: []*v1.Pod{},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true, success: true,
}, },
{ {
description: "Pod expected to fit one of the nodes", description: "Pod expected to fit one of the nodes",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
nodes: []*v1.Node{ pod.Spec.NodeSelector = map[string]string{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
}, podsOnNodes: []*v1.Pod{},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true, success: true,
}, },
{ {
description: "Pod expected to fit none of the nodes", description: "Pod expected to fit none of the nodes",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{ nodes: []*v1.Node{
{ test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
ObjectMeta: metav1.ObjectMeta{ node.ObjectMeta.Labels = map[string]string{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "unfit1", nodeLabelKey: "unfit1",
}, }
},
}, node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
{ }),
ObjectMeta: metav1.ObjectMeta{ test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
Name: nodeNames[1], node.ObjectMeta.Labels = map[string]string{
Labels: map[string]string{
nodeLabelKey: "unfit2", nodeLabelKey: "unfit2",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
}, podsOnNodes: []*v1.Pod{},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false, success: false,
}, },
{ {
description: "Nodes are unschedulable but labels match, should fail", description: "Nodes are unschedulable but labels match, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
nodes: []*v1.Node{ pod.Spec.NodeSelector = map[string]string{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}, }
}, }),
Spec: v1.NodeSpec{ nodes: []*v1.Node{
Unschedulable: true, test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
}, node.ObjectMeta.Labels = map[string]string{
}, nodeLabelKey: nodeLabelValue,
{ }
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1], node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
Labels: map[string]string{ node.Spec.Unschedulable = true
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no", nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
}, podsOnNodes: []*v1.Pod{},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false, success: false,
}, },
{ {
description: "Two nodes matches node selector, one of them is tained, should pass", description: "Both nodes are tained, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
nodes: []*v1.Node{ pod.Spec.NodeSelector = map[string]string{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}, }
}, pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
Spec: v1.NodeSpec{ }),
Taints: []v1.Taint{ nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{ {
Key: nodeTaintKey, Key: nodeTaintKey,
Value: nodeTaintValue, Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}, },
}, }
}, }),
}, test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
{ node.ObjectMeta.Labels = map[string]string{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
}, },
}
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
podsOnNodes: []*v1.Pod{},
success: false,
}, },
{ {
ObjectMeta: metav1.ObjectMeta{ description: "Two nodes matches node selector, one of them is tained, there is a pod on the available node, and requests are low, should pass",
Name: nodeNames[2], pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
}, },
}
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
podsOnNodes: []*v1.Pod{
test.BuildTestPod("test-pod", 12*1000, 20*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(40*1000*1000*1000, resource.DecimalSI)
}),
}, },
success: true, success: true,
}, },
{ {
description: "Both nodes are tained, should fail", description: "Two nodes matches node selector, one of them is tained, but CPU requests are too big, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
nodes: []*v1.Node{ pod.Spec.NodeSelector = map[string]string{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}, }
}, pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
Spec: v1.NodeSpec{ }),
Taints: []v1.Taint{ nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{ {
Key: nodeTaintKey, Key: nodeTaintKey,
Value: nodeTaintValue, Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}, },
}
}),
// Notice that this node only has 4 cores, the pod already on the node below requests 3 cores, and the pod above requests 2 cores
test.BuildTestNode(nodeNames[1], 4000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 3000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
}, },
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
}, },
{ {
ObjectMeta: metav1.ObjectMeta{ description: "Two nodes matches node selector, one of them is tained, but memory requests are too big, should fail",
Name: nodeNames[1], pod: test.BuildTestPod("p1", 2000, 5*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
Labels: map[string]string{ pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue, nodeLabelKey: nodeLabelValue,
}, }
}, pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
Spec: v1.NodeSpec{ }),
Taints: []v1.Taint{ nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{ {
Key: nodeTaintKey, Key: nodeTaintKey,
Value: nodeTaintValue, Value: nodeTaintValue,
Effect: v1.TaintEffectNoExecute, Effect: v1.TaintEffectNoSchedule,
}, },
}
}),
// Notice that this node only has 8GB of memory, the pod already on the node below requests 4GB, and the pod above requests 5GB
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
podsOnNodes: []*v1.Pod{
test.BuildTestPod("4GB-mem-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
}, },
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
}, },
{ {
ObjectMeta: metav1.ObjectMeta{ description: "Two nodes matches node selector, one of them is tained, but ephemeral storage requests are too big, should fail",
Name: nodeNames[2], pod: test.BuildTestPod("p1", 2000, 4*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
}, },
}
}),
// Notice that this node only has 20GB of storage, the pod already on the node below requests 11GB, and the pod above requests 10GB
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(20*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
}, },
podsOnNodes: []*v1.Pod{
test.BuildTestPod("11GB-storage-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(11*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but custom resource requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
}),
// Notice that this node only has 15 of the custom resource, the pod already on the node below requests 10, and the pod above requests 10
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("10-custom-resource-pod", 0, 0, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, and pod Overhead is low enough, should pass",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node has 5 CPU cores, the pod below requests 2 cores, and has CPU overhead of 1 cores, and the pod above requests 1 core
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Overhead = createResourceList(1000, 1000*1000*1000, 1000*1000*1000)
}),
},
success: true,
},
{
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, but pod Overhead is too high, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 5 CPU cores, the pod below requests 2 cores, but has CPU overhead of 2 cores, and the pod above requests 2 cores
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Overhead = createResourceList(2000, 1000*1000*1000, 1000*1000*1000)
}),
}, },
success: false, success: false,
}, },
} }
for _, tc := range tests { for _, tc := range tests {
actual := PodFitsAnyOtherNode(tc.pod, tc.nodes) t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.podsOnNodes {
objs = append(objs, pod)
}
objs = append(objs, tc.pod)
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
actual := PodFitsAnyOtherNode(getPodsAssignedToNode, tc.pod, tc.nodes)
if actual != tc.success { if actual != tc.success {
t.Errorf("Test %#v failed", tc.description) t.Errorf("Test %#v failed", tc.description)
} }
})
} }
} }
func createPodManifest(nodeName string, nodeSelectorKey string, nodeSelectorValue string) *v1.Pod { // createResourceList builds a small resource list of core resources
return (&v1.Pod{ func createResourceList(cpu int64, memory int64, ephemeralStorage int64) v1.ResourceList {
Spec: v1.PodSpec{ resourceList := make(map[v1.ResourceName]resource.Quantity)
NodeName: nodeName, resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(cpu, resource.DecimalSI)
Affinity: &v1.Affinity{ resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.DecimalSI)
NodeAffinity: &v1.NodeAffinity{ resourceList[v1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI)
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ return resourceList
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeSelectorKey,
Operator: "In",
Values: []string{
nodeSelectorValue,
},
},
},
},
},
},
},
},
},
})
} }

View File

@@ -67,6 +67,7 @@ func TestFindDuplicatePods(t *testing.T) {
Unschedulable: true, Unschedulable: true,
} }
}) })
node6 := test.BuildTestNode("n6", 200, 200, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p1.Namespace = "dev" p1.Namespace = "dev"
@@ -102,6 +103,14 @@ func TestFindDuplicatePods(t *testing.T) {
p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil) p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil)
p18.Namespace = "node-fit" p18.Namespace = "node-fit"
// This pod sits on node6 and is used to take up CPU requests on the node
p19 := test.BuildTestPod("CPU-eater", 150, 150, node6.Name, nil)
p19.Namespace = "test"
// Dummy pod for node6 used to do the opposite of p19
p20 := test.BuildTestPod("CPU-saver", 100, 150, node6.Name, nil)
p20.Namespace = "test"
// ### Evictable Pods ### // ### Evictable Pods ###
// Three Pods in the "default" Namespace, bound to same ReplicaSet. 2 should be evicted. // Three Pods in the "default" Namespace, bound to same ReplicaSet. 2 should be evicted.
@@ -263,6 +272,20 @@ func TestFindDuplicatePods(t *testing.T) {
expectedEvictedPodCount: 0, expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
}, },
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available does not have enough CPU, and nodeFit set to true. 0 should be evicted.",
pods: []*v1.Pod{p1, p2, p3, p19},
nodes: []*v1.Node{node1, node6},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available has enough CPU, and nodeFit set to true. 1 should be evicted.",
pods: []*v1.Pod{p1, p2, p3, p20},
nodes: []*v1.Node{node1, node6},
expectedEvictedPodCount: 1,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
} }
for _, testCase := range testCases { for _, testCase := range testCases {
@@ -297,6 +320,7 @@ func TestFindDuplicatePods(t *testing.T) {
nil, nil,
nil, nil,
testCase.nodes, testCase.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,
@@ -724,6 +748,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
nil, nil,
nil, nil,
testCase.nodes, testCase.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -166,9 +166,12 @@ func TestRemoveFailedPods(t *testing.T) {
{ {
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction", description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, nil, nil, nil, true), strategy: createStrategy(true, false, nil, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) { nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) {
node.Spec.Unschedulable = true node.Spec.Unschedulable = true
})}, }),
},
expectedEvictedPodCount: 0, expectedEvictedPodCount: 0,
pods: []*v1.Pod{ pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{ buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
@@ -176,6 +179,17 @@ func TestRemoveFailedPods(t *testing.T) {
}), nil), }), nil),
}, },
}, },
{
description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}), nil),
},
},
{ {
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction", description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false), strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false),
@@ -261,6 +275,7 @@ func TestRemoveFailedPods(t *testing.T) {
nil, nil,
nil, nil,
tc.nodes, tc.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -95,8 +95,8 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter
getPodsAssignedToNode, getPodsAssignedToNode,
podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
return evictable.IsEvictable(pod) && return evictable.IsEvictable(pod) &&
!nodeutil.PodFitsCurrentNode(pod, node) && !nodeutil.PodFitsCurrentNode(getPodsAssignedToNode, pod, node) &&
nodeutil.PodFitsAnyNode(pod, nodes) nodeutil.PodFitsAnyNode(getPodsAssignedToNode, pod, nodes)
}), }),
) )
if err != nil { if err != nil {

View File

@@ -222,6 +222,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
tc.maxPodsToEvictPerNode, tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace, tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes, tc.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -70,6 +70,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
Unschedulable: true, Unschedulable: true,
} }
}) })
node5 := test.BuildTestNode("n5", 1, 1, 1, nil)
node5 := test.BuildTestNode("n5", 2000, 3000, 10, nil) node5 := test.BuildTestNode("n5", 2000, 3000, 10, nil)
node5.Spec.Taints = []v1.Taint{ node5.Spec.Taints = []v1.Taint{
@@ -289,6 +290,15 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
excludedTaints: []string{"testTaint1=test2"}, excludedTaints: []string{"testTaint1=test2"},
expectedEvictedPodCount: 1, // pod gets evicted, as excluded taint value does not match node1's taint value expectedEvictedPodCount: 1, // pod gets evicted, as excluded taint value does not match node1's taint value
}, },
{
description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.",
pods: []*v1.Pod{p2, p7, p9, p10},
nodes: []*v1.Node{node1, node5},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
expectedEvictedPodCount: 0, //p2 and p7 can't be evicted
nodeFit: true,
},
} }
for _, tc := range tests { for _, tc := range tests {
@@ -324,6 +334,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
tc.maxPodsToEvictPerNode, tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace, tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes, tc.nodes,
getPodsAssignedToNode,
tc.evictLocalStoragePods, tc.evictLocalStoragePods,
tc.evictSystemCriticalPods, tc.evictSystemCriticalPods,
false, false,

View File

@@ -83,7 +83,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate
"Pods", thresholds[v1.ResourcePods], "Pods", thresholds[v1.ResourcePods],
} }
for name := range thresholds { for name := range thresholds {
if !isBasicResource(name) { if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name])) keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
} }
} }
@@ -164,7 +164,7 @@ func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
for name := range thresholds { for name := range thresholds {
if !isBasicResource(name) { if !nodeutil.IsBasicResource(name) {
targetThresholds[name] = MaxResourcePercentage targetThresholds[name] = MaxResourcePercentage
} }
} }

View File

@@ -385,6 +385,50 @@ func TestHighNodeUtilization(t *testing.T) {
}, },
expectedPodsEvicted: 0, expectedPodsEvicted: 0,
}, },
{
name: "Other node does not have enough Memory",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
// A pod requesting more memory than is available on node1
test.SetRSOwnerRef(pod)
}),
},
expectedPodsEvicted: 0,
},
{
name: "Other node does not have enough Memory",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
// A pod requesting more memory than is available on node1
test.SetRSOwnerRef(pod)
}),
},
expectedPodsEvicted: 0,
},
} }
for _, testCase := range testCases { for _, testCase := range testCases {
@@ -463,6 +507,7 @@ func TestHighNodeUtilization(t *testing.T) {
nil, nil,
nil, nil,
testCase.nodes, testCase.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,
@@ -668,6 +713,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
&item.evictionsExpected, &item.evictionsExpected,
nil, nil,
item.nodes, item.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -111,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
"Pods", thresholds[v1.ResourcePods], "Pods", thresholds[v1.ResourcePods],
} }
for name := range thresholds { for name := range thresholds {
if !isBasicResource(name) { if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name])) keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
} }
} }
@@ -125,7 +125,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
"Pods", targetThresholds[v1.ResourcePods], "Pods", targetThresholds[v1.ResourcePods],
} }
for name := range targetThresholds { for name := range targetThresholds {
if !isBasicResource(name) { if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name])) keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name]))
} }
} }

View File

@@ -695,6 +695,112 @@ func TestLowNodeUtilization(t *testing.T) {
expectedPodsEvicted: 2, expectedPodsEvicted: 2,
evictedPods: []string{}, evictedPods: []string{},
}, },
{
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 500, 200, 9, nil),
test.BuildTestNode(n2NodeName, 200, 200, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: notMatchingNodeSelectorValue,
}
}),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 10, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p4", 400, 100, n1NodeName, func(pod *v1.Pod) {
// A pod that requests too much CPU
test.SetNormalOwnerRef(pod)
}),
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 3,
},
{
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 500, 200, 9, nil),
test.BuildTestNode(n2NodeName, 200, 200, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: notMatchingNodeSelectorValue,
}
}),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 10, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p4", 400, 100, n1NodeName, func(pod *v1.Pod) {
// A pod that requests too much CPU
test.SetNormalOwnerRef(pod)
}),
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 3,
},
} }
for _, test := range testCases { for _, test := range testCases {
@@ -772,6 +878,7 @@ func TestLowNodeUtilization(t *testing.T) {
nil, nil,
nil, nil,
test.nodes, test.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,
@@ -1086,6 +1193,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
&item.evictionsExpected, &item.evictionsExpected,
nil, nil,
item.nodes, item.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -27,6 +27,8 @@ import (
"sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions" "sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/descheduler/node"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/pkg/utils"
) )
@@ -268,7 +270,7 @@ func evictPodsFromSourceNodes(
"Pods", totalAvailableUsage[v1.ResourcePods].Value(), "Pods", totalAvailableUsage[v1.ResourcePods].Value(),
} }
for name := range totalAvailableUsage { for name := range totalAvailableUsage {
if !isBasicResource(name) { if !node.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value()) keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
} }
} }
@@ -277,7 +279,7 @@ func evictPodsFromSourceNodes(
for _, node := range sourceNodes { for _, node := range sourceNodes {
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage) klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter) nonRemovablePods, removablePods := classifyPods(ctx, node.allPods, podFilter)
klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods)) klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods))
if len(removablePods) == 0 { if len(removablePods) == 0 {
@@ -338,7 +340,7 @@ func evictPods(
"Pods", nodeInfo.usage[v1.ResourcePods].Value(), "Pods", nodeInfo.usage[v1.ResourcePods].Value(),
} }
for name := range totalAvailableUsage { for name := range totalAvailableUsage {
if !isBasicResource(name) { if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value()) keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
} }
} }
@@ -361,7 +363,7 @@ func sortNodesByUsage(nodes []NodeInfo, ascending bool) {
// extended resources // extended resources
for name := range nodes[i].usage { for name := range nodes[i].usage {
if !isBasicResource(name) { if !nodeutil.IsBasicResource(name) {
ti = ti + nodes[i].usage[name].Value() ti = ti + nodes[i].usage[name].Value()
tj = tj + nodes[j].usage[name].Value() tj = tj + nodes[j].usage[name].Value()
} }
@@ -411,44 +413,7 @@ func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName {
return resourceNames return resourceNames
} }
// isBasicResource checks if resource is basic native. func classifyPods(ctx context.Context, pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
func isBasicResource(name v1.ResourceName) bool {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
return true
default:
return false
}
}
func nodeUtilization(node *v1.Node, pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
}
for _, name := range resourceNames {
if !isBasicResource(name) {
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for _, name := range resourceNames {
quantity, ok := req[name]
if ok && name != v1.ResourcePods {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
return totalReqs
}
func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
var nonRemovablePods, removablePods []*v1.Pod var nonRemovablePods, removablePods []*v1.Pod
for _, pod := range pods { for _, pod := range pods {

View File

@@ -47,6 +47,7 @@ func TestPodAntiAffinity(t *testing.T) {
Unschedulable: true, Unschedulable: true,
} }
}) })
node4 := test.BuildTestNode("n4", 2, 2, 1, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil) p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
@@ -174,6 +175,14 @@ func TestPodAntiAffinity(t *testing.T) {
nodes: []*v1.Node{node1}, nodes: []*v1.Node{node1},
expectedEvictedPodCount: 0, expectedEvictedPodCount: 0,
}, },
{
description: "Won't evict pods because only other node doesn't have enough resources",
maxPodsToEvictPerNode: &uint3,
pods: []*v1.Pod{p1, p2, p3, p4},
nodes: []*v1.Node{node1, node4},
expectedEvictedPodCount: 0,
nodeFit: true,
},
} }
for _, test := range tests { for _, test := range tests {
@@ -209,6 +218,7 @@ func TestPodAntiAffinity(t *testing.T) {
test.maxPodsToEvictPerNode, test.maxPodsToEvictPerNode,
test.maxNoOfPodsToEvictPerNamespace, test.maxNoOfPodsToEvictPerNamespace,
test.nodes, test.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -298,6 +298,7 @@ func TestPodLifeTime(t *testing.T) {
nil, nil,
nil, nil,
tc.nodes, tc.nodes,
getPodsAssignedToNode,
false, false,
false, false,
tc.ignorePvcPods, tc.ignorePvcPods,

View File

@@ -97,8 +97,10 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
Unschedulable: true, Unschedulable: true,
} }
}) })
node4 := test.BuildTestNode("node4", 200, 3000, 10, nil)
node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil)
pods := initPods(node1) pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil))
createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32, nodeFit bool) api.DeschedulerStrategy { createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32, nodeFit bool) api.DeschedulerStrategy {
return api.DeschedulerStrategy{ return api.DeschedulerStrategy{
@@ -199,6 +201,20 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
expectedEvictedPodCount: 0, expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3, maxPodsToEvictPerNode: &uint3,
}, },
{
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node does not have enough CPU, 0 pod evictions",
strategy: createStrategy(true, true, 1, true),
nodes: []*v1.Node{node1, node4},
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
},
{
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node has enough CPU, 3 pod evictions",
strategy: createStrategy(true, true, 1, true),
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 3,
maxPodsToEvictPerNode: &uint3,
},
} }
for _, tc := range tests { for _, tc := range tests {
@@ -234,6 +250,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
tc.maxPodsToEvictPerNode, tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace, tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes, tc.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -18,20 +18,18 @@ package strategies
import ( import (
"context" "context"
"fmt"
"math" "math"
"sort" "sort"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions" "sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation" "sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation"
"sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/pkg/utils"
@@ -170,7 +168,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint) klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
continue continue
} }
balanceDomains(podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodeMap) balanceDomains(ctx, client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodes)
} }
} }
@@ -225,15 +223,18 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol
// [5, 5, 5, 5, 5, 5] // [5, 5, 5, 5, 5, 5]
// (assuming even distribution by the scheduler of the evicted pods) // (assuming even distribution by the scheduler of the evicted pods)
func balanceDomains( func balanceDomains(
ctx context.Context,
client clientset.Interface,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
podsForEviction map[*v1.Pod]struct{}, podsForEviction map[*v1.Pod]struct{},
constraint v1.TopologySpreadConstraint, constraint v1.TopologySpreadConstraint,
constraintTopologies map[topologyPair][]*v1.Pod, constraintTopologies map[topologyPair][]*v1.Pod,
sumPods float64, sumPods float64,
isEvictable func(*v1.Pod) bool, isEvictable func(pod *v1.Pod) bool,
nodeMap map[string]*v1.Node) { nodes []*v1.Node) {
idealAvg := sumPods / float64(len(constraintTopologies)) idealAvg := sumPods / float64(len(constraintTopologies))
sortedDomains := sortDomains(constraintTopologies, isEvictable) sortedDomains := sortDomains(ctx, constraintTopologies, isEvictable)
// i is the index for belowOrEqualAvg // i is the index for belowOrEqualAvg
// j is the index for aboveAvg // j is the index for aboveAvg
i := 0 i := 0
@@ -273,8 +274,8 @@ func balanceDomains(
// also (just for tracking), add them to the list of pods in the lower topology // also (just for tracking), add them to the list of pods in the lower topology
aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:] aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:]
for k := range aboveToEvict { for k := range aboveToEvict {
if err := validatePodFitsOnOtherNodes(aboveToEvict[k], nodeMap); err != nil { if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
klog.V(2).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(aboveToEvict[k])) klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
continue continue
} }
@@ -285,56 +286,6 @@ func balanceDomains(
} }
} }
// validatePodFitsOnOtherNodes performs validation based on scheduling predicates for affinity and toleration.
// It excludes the current node because, for the sake of domain balancing only, we care about if there is any other
// place it could theoretically fit.
// If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading
func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error {
// if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node,
// don't bother evicting it as it will just end up back on the same node
// however we still account for it "being evicted" so the algorithm can complete
// TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod
// but still try to get the required # of movePods (instead of just chopping that value off the slice above)
isRequiredDuringSchedulingIgnoredDuringExecution := pod.Spec.Affinity != nil &&
pod.Spec.Affinity.NodeAffinity != nil &&
pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil
hardTaintsFilter := func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
}
var eligibleNodesCount, ineligibleAffinityNodesCount, ineligibleTaintedNodesCount int
for _, node := range nodeMap {
if node == nodeMap[pod.Spec.NodeName] {
continue
}
if pod.Spec.NodeSelector != nil || isRequiredDuringSchedulingIgnoredDuringExecution {
if !nodeutil.PodFitsCurrentNode(pod, node) {
ineligibleAffinityNodesCount++
continue
}
}
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, hardTaintsFilter) {
ineligibleTaintedNodesCount++
continue
}
eligibleNodesCount++
}
if eligibleNodesCount == 0 {
var errs []error
if ineligibleAffinityNodesCount > 0 {
errs = append(errs, fmt.Errorf("%d nodes with ineligible selector/affinity", ineligibleAffinityNodesCount))
}
if ineligibleTaintedNodesCount > 0 {
errs = append(errs, fmt.Errorf("%d nodes with taints that are not tolerated", ineligibleTaintedNodesCount))
}
return utilerrors.NewAggregate(errs)
}
return nil
}
// sortDomains sorts and splits the list of topology domains based on their size // sortDomains sorts and splits the list of topology domains based on their size
// it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order: // it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order:
// 1. non-evictable pods // 1. non-evictable pods
@@ -342,7 +293,7 @@ func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error
// 3. pods in descending priority // 3. pods in descending priority
// 4. all other pods // 4. all other pods
// We then pop pods off the back of the list for eviction // We then pop pods off the back of the list for eviction
func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(*v1.Pod) bool) []topology { func sortDomains(ctx context.Context, constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(pod *v1.Pod) bool) []topology {
sortedTopologies := make([]topology, 0, len(constraintTopologyPairs)) sortedTopologies := make([]topology, 0, len(constraintTopologyPairs))
// sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted) // sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted)
for pair, list := range constraintTopologyPairs { for pair, list := range constraintTopologyPairs {

View File

@@ -483,6 +483,38 @@ func TestTopologySpreadConstraint(t *testing.T) {
}, },
namespaces: []string{"ns1"}, namespaces: []string{"ns1"},
}, },
{
name: "2 domains size [2 6], maxSkew=2, can't move any because node1 does not have enough CPU",
nodes: []*v1.Node{
test.BuildTestNode("n1", 200, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
},
pods: createTestPods([]testPodList{
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(2),
},
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
},
{
count: 6,
node: "n2",
labels: map[string]string{"foo": "bar"},
},
}),
expectedEvictedCount: 0,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: true,
},
},
namespaces: []string{"ns1"},
},
{ {
// see https://github.com/kubernetes-sigs/descheduler/issues/564 // see https://github.com/kubernetes-sigs/descheduler/issues/564
name: "Multiple constraints (6 nodes/2 zones, 4 pods)", name: "Multiple constraints (6 nodes/2 zones, 4 pods)",
@@ -686,7 +718,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
namespaces: []string{"ns1"}, namespaces: []string{"ns1"},
}, },
{ {
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node", name: "2 domains, sizes [2,0], maxSkew=1, move 1 pods since pod does not tolerate the tainted node",
nodes: []*v1.Node{ nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
@@ -718,6 +750,43 @@ func TestTopologySpreadConstraint(t *testing.T) {
strategy: api.DeschedulerStrategy{}, strategy: api.DeschedulerStrategy{},
namespaces: []string{"ns1"}, namespaces: []string{"ns1"},
}, },
{
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node, and NodeFit is enabled",
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
n.Labels["zone"] = "zoneB"
n.Spec.Taints = []v1.Taint{
{
Key: "taint-test",
Value: "test",
Effect: v1.TaintEffectNoSchedule,
},
}
}),
},
pods: createTestPods([]testPodList{
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
nodeSelector: map[string]string{"zone": "zoneA"},
},
}),
expectedEvictedCount: 0,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: true,
},
},
namespaces: []string{"ns1"},
},
{ {
name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint", name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint",
nodes: []*v1.Node{ nodes: []*v1.Node{
@@ -902,6 +971,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
nil, nil,
nil, nil,
tc.nodes, tc.nodes,
getPodsAssignedToNode,
false, false,
false, false,
false, false,

View File

@@ -6,25 +6,9 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
"k8s.io/klog/v2" "k8s.io/klog/v2"
) )
const (
// owner: @jinxu
// beta: v1.10
//
// New local storage types to support local storage capacity isolation
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"
// owner: @egernst
// alpha: v1.16
//
// Enables PodOverhead, for accounting pod overheads which are specific to a given RuntimeClass
PodOverhead featuregate.Feature = "PodOverhead"
)
// GetResourceRequest finds and returns the request value for a specific resource. // GetResourceRequest finds and returns the request value for a specific resource.
func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 { func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
if resource == v1.ResourcePods { if resource == v1.ResourcePods {
@@ -53,11 +37,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
requestQuantity = resource.Quantity{Format: resource.DecimalSI} requestQuantity = resource.Quantity{Format: resource.DecimalSI}
} }
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
return requestQuantity
}
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok { if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
requestQuantity.Add(rQuantity) requestQuantity.Add(rQuantity)
@@ -72,9 +51,9 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
} }
} }
// if PodOverhead feature is supported, add overhead for running a pod // We assume pod overhead feature gate is enabled.
// to the total requests if the resource total is non-zero // We can't import the scheduler settings so we will inherit the default.
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) { if pod.Spec.Overhead != nil {
if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() { if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() {
requestQuantity.Add(podOverhead) requestQuantity.Add(podOverhead)
} }
@@ -162,9 +141,9 @@ func PodRequestsAndLimits(pod *v1.Pod) (reqs, limits v1.ResourceList) {
maxResourceList(limits, container.Resources.Limits) maxResourceList(limits, container.Resources.Limits)
} }
// if PodOverhead feature is supported, add overhead for running a pod // We assume pod overhead feature gate is enabled.
// to the sum of reqeuests and to non-zero limits: // We can't import the scheduler settings so we will inherit the default.
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) { if pod.Spec.Overhead != nil {
addResourceList(reqs, pod.Spec.Overhead) addResourceList(reqs, pod.Spec.Overhead)
for name, quantity := range pod.Spec.Overhead { for name, quantity := range pod.Spec.Overhead {

View File

@@ -144,6 +144,7 @@ func TestRemoveDuplicates(t *testing.T) {
nil, nil,
nil, nil,
nodes, nodes,
getPodsAssignedToNode,
true, true,
false, false,
false, false,

View File

@@ -83,7 +83,7 @@ func TestFailedPods(t *testing.T) {
defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy})
waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed)
podEvictor := initPodEvictorOrFail(t, clientSet, nodes) podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
t.Logf("Running RemoveFailedPods strategy for %s", name) t.Logf("Running RemoveFailedPods strategy for %s", name)
strategies.RemoveFailedPods( strategies.RemoveFailedPods(

View File

@@ -39,7 +39,6 @@ import (
v1qos "k8s.io/kubectl/pkg/util/qos" v1qos "k8s.io/kubectl/pkg/util/qos"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
deschedulerapi "sigs.k8s.io/descheduler/pkg/api" deschedulerapi "sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler" "sigs.k8s.io/descheduler/pkg/descheduler"
"sigs.k8s.io/descheduler/pkg/descheduler/client" "sigs.k8s.io/descheduler/pkg/descheduler/client"
@@ -199,6 +198,7 @@ func runPodLifetimeStrategy(
nil, nil,
nil, nil,
nodes, nodes,
getPodsAssignedToNode,
false, false,
evictCritical, evictCritical,
false, false,
@@ -324,7 +324,7 @@ func TestLowNodeUtilization(t *testing.T) {
waitForRCPodsRunning(ctx, t, clientSet, rc) waitForRCPodsRunning(ctx, t, clientSet, rc)
// Run LowNodeUtilization strategy // Run LowNodeUtilization strategy
podEvictor := initPodEvictorOrFail(t, clientSet, nodes) podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
podFilter, err := podutil.NewOptions().WithFilter(podEvictor.Evictable().IsEvictable).BuildFilterFunc() podFilter, err := podutil.NewOptions().WithFilter(podEvictor.Evictable().IsEvictable).BuildFilterFunc()
if err != nil { if err != nil {
@@ -886,17 +886,6 @@ func TestEvictAnnotation(t *testing.T) {
clientSet, nodeInformer, getPodsAssignedToNode, stopCh := initializeClient(t) clientSet, nodeInformer, getPodsAssignedToNode, stopCh := initializeClient(t)
defer close(stopCh) defer close(stopCh)
nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Error listing node with %v", err)
}
var nodes []*v1.Node
for i := range nodeList.Items {
node := nodeList.Items[i]
nodes = append(nodes, &node)
}
testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}}
if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
t.Fatalf("Unable to create ns %v", testNamespace.Name) t.Fatalf("Unable to create ns %v", testNamespace.Name)
@@ -977,7 +966,7 @@ func TestDeschedulingInterval(t *testing.T) {
} }
s.Client = clientSet s.Client = clientSet
deschedulerPolicy := &api.DeschedulerPolicy{} deschedulerPolicy := &deschedulerapi.DeschedulerPolicy{}
c := make(chan bool, 1) c := make(chan bool, 1)
go func() { go func() {
@@ -1349,7 +1338,7 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) {
return allNodes, workerNodes return allNodes, workerNodes
} }
func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor { func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, nodes []*v1.Node) *evictions.PodEvictor {
evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet)
if err != nil || len(evictionPolicyGroupVersion) == 0 { if err != nil || len(evictionPolicyGroupVersion) == 0 {
t.Fatalf("Error creating eviction policy group: %v", err) t.Fatalf("Error creating eviction policy group: %v", err)
@@ -1361,6 +1350,7 @@ func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*
nil, nil,
nil, nil,
nodes, nodes,
getPodsAssignedToNode,
true, true,
false, false,
false, false,

View File

@@ -137,6 +137,7 @@ func TestTooManyRestarts(t *testing.T) {
nil, nil,
nil, nil,
nodes, nodes,
getPodsAssignedToNode,
true, true,
false, false,
false, false,

View File

@@ -77,7 +77,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
defer deleteRC(ctx, t, clientSet, violatorRc) defer deleteRC(ctx, t, clientSet, violatorRc)
waitForRCPodsRunning(ctx, t, clientSet, violatorRc) waitForRCPodsRunning(ctx, t, clientSet, violatorRc)
podEvictor := initPodEvictorOrFail(t, clientSet, nodes) podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
// Run TopologySpreadConstraint strategy // Run TopologySpreadConstraint strategy
t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name) t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name)