Skip to content

Commit

Permalink
Merge pull request #68081 from silveryfu/image-locality-tests-new
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 63437, 68081). If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md.

Enable ImageLocalityPriority by default with integration tests

**What this PR does / why we need it**:

This PR is a follow-up to [#63842](#63842). It moves the ImageLocalityPriority function to default priority functions of the default algorithm provider and adds integration tests for the updated scheduling policy.

- Compared to [#64662](#64662), this PR does note provide e2e test due to concerns about a large image may add too much overhead to the testing infrastructure and pipeline. We should add e2e tests in the future with the use of large enough image(s) in following PRs. 

- Compared to [#64662](#64662), this PR simplifies the code changes and keeps code changes under test/integration/scheduler/.

- The PR contains a bug fix for [#65745](#65745) - caught by the integration test - where the image states are not properly cloned to the scheduler's cachedNodeInfoMap. We might split this fix into a separate PR.

The integration test covers what follows: a pod requiring a large image (~= 3GB) is submitted to the cluster and there is a single node in the cluster has the same large image; the pod should get scheduled to that node. We might also consider whether more scenarios are desired.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #

**Special notes for your reviewer**:

Kindly ping @resouer and @bsalamat 

**Release note**:

```release-note
None
```
  • Loading branch information
Kubernetes Submit Queue authored Sep 1, 2018
2 parents 147520f + 3e438d7 commit da25aaa
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 10 deletions.
7 changes: 3 additions & 4 deletions pkg/scheduler/algorithmprovider/defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,6 @@ func init() {
// Register the priority function so that its available
// but do not include it as part of the default priorities
factory.RegisterPriorityFunction2("EqualPriority", core.EqualPriorityMap, nil, 1)
// ImageLocalityPriority prioritizes nodes based on locality of images requested by a pod. Nodes with larger size
// of already-installed packages required by the pod will be preferred over nodes with no already-installed
// packages required by the pod or a small total size of already-installed packages required by the pod.
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
factory.RegisterPriorityFunction2(
Expand Down Expand Up @@ -260,6 +256,9 @@ func defaultPriorities() sets.String {

// Prioritizes nodes that marked with taint which pod can tolerate.
factory.RegisterPriorityFunction2("TaintTolerationPriority", priorities.ComputeTaintTolerationPriorityMap, priorities.ComputeTaintTolerationPriorityReduce, 1),

// ImageLocalityPriority prioritizes nodes that have images requested by the pod present.
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1),
)
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/scheduler/algorithmprovider/defaults/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ func TestDefaultPriorities(t *testing.T) {
"BalancedResourceAllocation",
"NodePreferAvoidPodsPriority",
"NodeAffinityPriority",
"TaintTolerationPriority")
"TaintTolerationPriority",
"ImageLocalityPriority")
if expected := defaultPriorities(); !result.Equal(expected) {
t.Errorf("expected %v got %v", expected, result)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/scheduler/cache/node_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ func (n *NodeInfo) Clone() *NodeInfo {
diskPressureCondition: n.diskPressureCondition,
pidPressureCondition: n.pidPressureCondition,
usedPorts: make(util.HostPortInfo),
imageStates: make(map[string]*ImageStateSummary),
imageStates: n.imageStates,
generation: n.generation,
}
if len(n.pods) > 0 {
Expand Down
62 changes: 62 additions & 0 deletions test/integration/scheduler/priorities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
testutils "k8s.io/kubernetes/test/utils"
"strings"
)

// This file tests the scheduler priority functions.
Expand Down Expand Up @@ -172,3 +173,64 @@ func TestPodAffinity(t *testing.T) {
}
t.Errorf("Pod %v got scheduled on an unexpected node: %v.", podName, pod.Spec.NodeName)
}

// TestImageLocality verifies that the scheduler's image locality priority function
// works correctly, i.e., the pod gets scheduled to the node where its container images are ready.
func TestImageLocality(t *testing.T) {
context := initTest(t, "image-locality")
defer cleanupTest(t, context)

// Add a few nodes.
_, err := createNodes(context.clientSet, "testnode", nil, 10)
if err != nil {
t.Fatalf("cannot create nodes: %v", err)
}

// We use a fake large image as the test image used by the pod, which has relatively large image size.
image := v1.ContainerImage{
Names: []string{
"fake-large-image:v1",
},
SizeBytes: 3000 * 1024 * 1024,
}

// Create a node with the large image
nodeWithLargeImage, err := createNodeWithImages(context.clientSet, "testnode-large-image", nil, []v1.ContainerImage{image})
if err != nil {
t.Fatalf("cannot create node with a large image: %v", err)
}

// Create a pod with containers each having the specified image.
podName := "pod-using-large-image"
pod, err := runPodWithContainers(context.clientSet, initPodWithContainers(context.clientSet, &podWithContainersConfig{
Name: podName,
Namespace: context.ns.Name,
Containers: makeContainersWithImages(image.Names),
}))
if err != nil {
t.Fatalf("error running pod with images: %v", err)
}
if pod.Spec.NodeName != nodeWithLargeImage.Name {
t.Errorf("pod %v got scheduled on an unexpected node: %v. Expected node: %v.", podName, pod.Spec.NodeName, nodeWithLargeImage.Name)
} else {
t.Logf("pod %v got successfully scheduled on node %v.", podName, pod.Spec.NodeName)
}
}

// makeContainerWithImage returns a list of v1.Container objects for each given image. Duplicates of an image are ignored,
// i.e., each image is used only once.
func makeContainersWithImages(images []string) []v1.Container {
var containers []v1.Container
usedImages := make(map[string]struct{})

for _, image := range images {
if _, ok := usedImages[image]; !ok {
containers = append(containers, v1.Container{
Name: strings.Replace(image, ":", "-", -1) + "-container",
Image: image,
})
usedImages[image] = struct{}{}
}
}
return containers
}
1 change: 1 addition & 0 deletions test/integration/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) {
"NodePreferAvoidPodsPriority",
"SelectorSpreadPriority",
"TaintTolerationPriority",
"ImageLocalityPriority",
),
},
{
Expand Down
56 changes: 52 additions & 4 deletions test/integration/scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,24 +325,35 @@ func waitForNodeLabels(cs clientset.Interface, nodeName string, labels map[strin
return wait.Poll(time.Millisecond*100, wait.ForeverTestTimeout, nodeHasLabels(cs, nodeName, labels))
}

// createNode creates a node with the given resource list and
// returns a pointer and error status. If 'res' is nil, a predefined amount of
// initNode returns a node with the given resource list and images. If 'res' is nil, a predefined amount of
// resource will be used.
func createNode(cs clientset.Interface, name string, res *v1.ResourceList) (*v1.Node, error) {
func initNode(name string, res *v1.ResourceList, images []v1.ContainerImage) *v1.Node {
// if resource is nil, we use a default amount of resources for the node.
if res == nil {
res = &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
}
}

n := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: name},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: *res,
Images: images,
},
}
return cs.CoreV1().Nodes().Create(n)
return n
}

// createNode creates a node with the given resource list.
func createNode(cs clientset.Interface, name string, res *v1.ResourceList) (*v1.Node, error) {
return cs.CoreV1().Nodes().Create(initNode(name, res, nil))
}

// createNodeWithImages creates a node with the given resource list and images.
func createNodeWithImages(cs clientset.Interface, name string, res *v1.ResourceList, images []v1.ContainerImage) (*v1.Node, error) {
return cs.CoreV1().Nodes().Create(initNode(name, res, images))
}

// updateNodeStatus updates the status of node.
Expand Down Expand Up @@ -492,6 +503,43 @@ func runPausePod(cs clientset.Interface, pod *v1.Pod) (*v1.Pod, error) {
return pod, nil
}

type podWithContainersConfig struct {
Name string
Namespace string
Containers []v1.Container
}

// initPodWithContainers initializes a pod API object from the given config. This is used primarily for generating
// pods with containers each having a specific image.
func initPodWithContainers(cs clientset.Interface, conf *podWithContainersConfig) *v1.Pod {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: conf.Name,
Namespace: conf.Namespace,
},
Spec: v1.PodSpec{
Containers: conf.Containers,
},
}
return pod
}

// runPodWithContainers creates a pod with given config and containers and waits
// until it is scheduled. It returns its pointer and error status.
func runPodWithContainers(cs clientset.Interface, pod *v1.Pod) (*v1.Pod, error) {
pod, err := cs.CoreV1().Pods(pod.Namespace).Create(pod)
if err != nil {
return nil, fmt.Errorf("Error creating pod-with-containers: %v", err)
}
if err = waitForPodToSchedule(cs, pod); err != nil {
return pod, fmt.Errorf("Pod %v didn't schedule successfully. Error: %v", pod.Name, err)
}
if pod, err = cs.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}); err != nil {
return pod, fmt.Errorf("Error getting pod %v info: %v", pod.Name, err)
}
return pod, nil
}

// podDeleted returns true if a pod is not found in the given namespace.
func podDeleted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
Expand Down

0 comments on commit da25aaa

Please sign in to comment.