Skip to content

Commit

Permalink
Merge pull request kubernetes#20140 from resouer/scheduler
Browse files Browse the repository at this point in the history
Auto commit by PR queue bot
  • Loading branch information
k8s-merge-robot committed Feb 6, 2016
2 parents 1eff1d5 + 233a601 commit 8e56494
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/devel/scheduler_algorithm.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Currently, Kubernetes scheduler provides some practical priority functions, incl
- `BalancedResourceAllocation`: This priority function tries to put the Pod on a node such that the CPU and Memory utilization rate is balanced after the Pod is deployed.
- `CalculateSpreadPriority`: Spread Pods by minimizing the number of Pods belonging to the same service on the same node. If zone information is present on the nodes, the priority will be adjusted so that pods are spread across zones and nodes.
- `CalculateAntiAffinityPriority`: Spread Pods by minimizing the number of Pods belonging to the same service on nodes with the same value for a particular label.
- `ImageLocalityPriority`: Nodes are prioritized based on locality of images requested by a pod. Nodes with larger size of already-installed packages required by the pod will be preferred over nodes with no already-installed packages required by the pod or a small total size of already-installed packages required by the pod.

The details of the above priority functions can be found in [plugin/pkg/scheduler/algorithm/priorities](http://releases.k8s.io/HEAD/plugin/pkg/scheduler/algorithm/priorities/). Kubernetes uses some, but not all, of these priority functions by default. You can see which ones are used by default in [plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go](http://releases.k8s.io/HEAD/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go). Similar as predicates, you can combine the above priority functions and assign weight factors (positive number) to them as you want (check [scheduler.md](scheduler.md) for how to customize).

Expand Down
72 changes: 72 additions & 0 deletions plugin/pkg/scheduler/algorithm/priorities/priorities.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,78 @@ func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod *api.Pod, machines
return result, nil
}

// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
)

// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
sumSizeMap := make(map[string]int64)

nodes, err := nodeLister.List()
if err != nil {
return nil, err
}

for _, container := range pod.Spec.Containers {
for _, node := range nodes.Items {
// Check if this container's image is present and get its size.
imageSize := checkContainerImageOnNode(node, container)
// Add this size to the total result of this node.
sumSizeMap[node.Name] += imageSize
}
}

result := []schedulerapi.HostPriority{}
// score int - scale of 0-10
// 0 being the lowest priority and 10 being the highest.
for nodeName, sumSize := range sumSizeMap {
result = append(result, schedulerapi.HostPriority{Host: nodeName,
Score: calculateScoreFromSize(sumSize)})
}
return result, nil
}

// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
func checkContainerImageOnNode(node api.Node, container api.Container) int64 {
for _, image := range node.Status.Images {
for _, repoTag := range image.RepoTags {
if container.Image == repoTag {
// Should return immediately.
return image.Size
}
}
}
return 0
}

// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
var score int
switch {
case sumSize == 0 || sumSize < minImgSize:
// score == 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
score = 0
// If existing images' total size is larger than max, just make it highest priority.
case sumSize >= maxImgSize:
score = 10
default:
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
}
// Return which bucket the given size belongs to
return score
}

// BalancedResourceAllocation favors nodes with balanced resource usage rate.
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
Expand Down
158 changes: 158 additions & 0 deletions plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -736,3 +736,161 @@ func TestBalancedResourceAllocation(t *testing.T) {
}
}
}

func TestImageLocalityPriority(t *testing.T) {
test_40_250 := api.PodSpec{
Containers: []api.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/250",
},
},
}

test_40_140 := api.PodSpec{
Containers: []api.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
},
},
}

test_min_max := api.PodSpec{
Containers: []api.Container{
{
Image: "gcr.io/10",
},
{
Image: "gcr.io/2000",
},
},
}

node_40_140_2000 := api.NodeStatus{
Images: []api.ContainerImage{
{
RepoTags: []string{
"gcr.io/40",
"gcr.io/40:v1",
"gcr.io/40:v1",
},
Size: int64(40 * mb),
},
{
RepoTags: []string{
"gcr.io/140",
"gcr.io/140:v1",
},
Size: int64(140 * mb),
},
{
RepoTags: []string{
"gcr.io/2000",
},
Size: int64(2000 * mb),
},
},
}

node_250_10 := api.NodeStatus{
Images: []api.ContainerImage{
{
RepoTags: []string{
"gcr.io/250",
},
Size: int64(250 * mb),
},
{
RepoTags: []string{
"gcr.io/10",
"gcr.io/10:v1",
},
Size: int64(10 * mb),
},
},
}

tests := []struct {
pod *api.Pod
pods []*api.Pod
nodes []api.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
// Pod: gcr.io/40 gcr.io/250

// Node1
// Image: gcr.io/40 40MB
// Score: (40M-23M)/97.7M + 1 = 1

// Node2
// Image: gcr.io/250 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &api.Pod{Spec: test_40_250},
nodes: []api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{"machine1", 1}, {"machine2", 3}},
test: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140

// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2

// Node2
// Image: not present
// Score: 0
pod: &api.Pod{Spec: test_40_140},
nodes: []api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{"machine1", 2}, {"machine2", 0}},
test: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10

// Node1
// Image: gcr.io/2000 2000MB
// Score: 2000 > max score = 10

// Node2
// Image: gcr.io/10 10MB
// Score: 10 < min score = 0
pod: &api.Pod{Spec: test_min_max},
nodes: []api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{"machine1", 10}, {"machine2", 0}},
test: "if exceed limit, use limit",
},
}

for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
list, err := ImageLocalityPriority(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
if err != nil {
t.Errorf("unexpected error: %v", err)
}

sort.Sort(test.expectedList)
sort.Sort(list)

if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

func makeImageNode(node string, status api.NodeStatus) api.Node {
return api.Node{
ObjectMeta: api.ObjectMeta{Name: node},
Status: status,
}
}

0 comments on commit 8e56494

Please sign in to comment.