Skip to content

Commit

Permalink
Changes to improve realtime support
Browse files Browse the repository at this point in the history
Signed-off-by: Jordi Gil <jgil@redhat.com>
  • Loading branch information
jordigilh committed Oct 1, 2021
1 parent 399d8f1 commit 1b7069c
Show file tree
Hide file tree
Showing 41 changed files with 749 additions and 94 deletions.
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ container_bundle(
"$(container_prefix)/$(image_prefix)alpine-ext-kernel-boot-demo:$(container_tag)": "//containerimages:alpine-ext-kernel-boot-demo-container",
# Customized container-disk images
"$(container_prefix)/$(image_prefix)fedora-with-test-tooling-container-disk:$(container_tag)": "//containerimages:fedora-with-test-tooling",
"$(container_prefix)/$(image_prefix)fedora-realtime-container-disk:$(container_tag)": "//containerimages:fedora-realtime",
# testing images
"$(container_prefix)/$(image_prefix)disks-images-provider:$(container_tag)": "//images/disks-images-provider:disks-images-provider-image",
"$(container_prefix)/$(image_prefix)nfs-server:$(container_tag)": "//images/nfs-server:nfs-server-image",
Expand Down
7 changes: 7 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,13 @@ container_pull(
repository = "kubevirt/alpine-ext-kernel-boot-demo",
)

container_pull(
name = "fedora_realtime",
digest = "sha256:437f4e02986daf0058239f4a282d32304dcac629d5d1b4c75a74025f1ce22811",
registry = "quay.io",
repository = "kubevirt/fedora-realtime-container-disk",
)

load(
"@io_bazel_rules_docker//go:image.bzl",
_go_image_repos = "repositories",
Expand Down
14 changes: 14 additions & 0 deletions api/openapi-spec/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -9766,6 +9766,10 @@
"description": "NUMA allows specifying settings for the guest NUMA topology",
"$ref": "#/definitions/v1.NUMA"
},
"realtime": {
"description": "Realtime instructs the virt-launcher to tune the VMI for lower latency, optional for real time workloads",
"$ref": "#/definitions/v1.Realtime"
},
"sockets": {
"description": "Sockets specifies the number of sockets inside the vmi. Must be a value greater or equal 1.",
"type": "integer",
Expand Down Expand Up @@ -11832,6 +11836,16 @@
}
}
},
"v1.Realtime": {
"description": "Realtime holds the tuning knobs specific for realtime workloads.",
"type": "object",
"properties": {
"mask": {
"description": "Mask defines the vcpu mask expression that defines which vcpus are used for realtime. Format matches libvirt's expressions. Example: \"0-3,^1\",\"0,2,3\",\"2-3\"",
"type": "string"
}
}
},
"v1.ReloadableComponentConfiguration": {
"description": "ReloadableComponentConfiguration holds all generic k8s configuration options which can be reloaded by components without requiring a restart.",
"type": "object",
Expand Down
6 changes: 6 additions & 0 deletions automation/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ elif [[ $TARGET =~ sig-network ]]; then
elif [[ $TARGET =~ sig-storage ]]; then
export KUBEVIRT_PROVIDER=${TARGET/-sig-storage/}
export KUBEVIRT_STORAGE="rook-ceph-default"
elif [[ $TARGET =~ sig-compute-realtime ]]; then
export KUBEVIRT_PROVIDER=${TARGET/-sig-compute-realtime/}
export KUBEVIRT_HUGEPAGES_2M=512
export KUBEVIRT_REALTIME_SCHEDULER=true
elif [[ $TARGET =~ sig-compute ]]; then
export KUBEVIRT_PROVIDER=${TARGET/-sig-compute/}
elif [[ $TARGET =~ sig-operator ]]; then
Expand Down Expand Up @@ -353,6 +357,8 @@ if [[ -z ${KUBEVIRT_E2E_FOCUS} && -z ${KUBEVIRT_E2E_SKIP} ]]; then
export KUBEVIRT_E2E_FOCUS="\\[sig-storage\\]|\\[rook-ceph\\]"
elif [[ $TARGET =~ vgpu.* ]]; then
export KUBEVIRT_E2E_FOCUS=MediatedDevices
elif [[ $TARGET =~ sig-compute-realtime ]]; then
export KUBEVIRT_E2E_FOCUS="\\[sig-compute-realtime\\]"
elif [[ $TARGET =~ sig-compute ]]; then
export KUBEVIRT_E2E_FOCUS="\\[sig-compute\\]"
export KUBEVIRT_E2E_SKIP="GPU|MediatedDevices"
Expand Down
2 changes: 1 addition & 1 deletion cluster-up-sha.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
bcdadf6a02e52fb5e52ec15f39858b9ba13d0f14
0597c310bda39d6f4637f6c8d4270297305b26db
2 changes: 1 addition & 1 deletion cluster-up/hack/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ provider_prefix=${JOB_NAME:-${KUBEVIRT_PROVIDER}}${EXECUTOR_NUMBER}
job_prefix=${JOB_NAME:-kubevirt}${EXECUTOR_NUMBER}

mkdir -p $KUBEVIRTCI_CONFIG_PATH/$KUBEVIRT_PROVIDER
KUBEVIRTCI_TAG=2109251021-10bdcd5
KUBEVIRTCI_TAG=2109282157-4c66402
2 changes: 1 addition & 1 deletion cluster-up/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2109251021-10bdcd5
2109282157-4c66402
14 changes: 14 additions & 0 deletions containerimages/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,17 @@ container_image(
tars = [":alpine-image-tar"],
visibility = ["//visibility:public"],
)

container_image(
name = "fedora-realtime",
architecture = select({
"@io_bazel_rules_go//go/platform:linux_arm64": "arm64",
"//conditions:default": "amd64",
}),
base = select({
"@io_bazel_rules_go//go/platform:linux_arm64": "@fedora_realtime_aarch64//image",
"//conditions:default": "@fedora_realtime//image",
}),
mode = "444",
visibility = ["//visibility:public"],
)
2 changes: 1 addition & 1 deletion hack/check-unassigned-tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

main() {
skip="SRIOV|GPU|\\[sig-operator\\]|\\[sig-network\\]|\\[sig-storage\\]|\\[sig-compute\\]|\\[sig-performance\\]"
skip="SRIOV|GPU|\\[sig-operator\\]|\\[sig-network\\]|\\[sig-storage\\]|\\[sig-compute\\]|\\[sig-performance\\]|\\[sig-compute-realtime\\]"
result=$(FUNC_TEST_ARGS="-dryRun -skip=${skip}" make functest)
total_tests=$(echo "${result}" | grep "Ran[[:space:]].*of" | awk '{print $2}')
if [ "${total_tests}" != "0" ]; then
Expand Down
2 changes: 1 addition & 1 deletion hack/config-default.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cdi_namespace=cdi
image_pull_policy=${IMAGE_PULL_POLICY:-IfNotPresent}
verbosity=${VERBOSITY:-2}
package_name=${PACKAGE_NAME:-kubevirt-dev}
kubevirtci_git_hash="2109251021-10bdcd5"
kubevirtci_git_hash="2109282157-4c66402"
conn_check_ipv4_address=${CONN_CHECK_IPV4_ADDRESS:-""}
conn_check_ipv6_address=${CONN_CHECK_IPV6_ADDRESS:-""}
conn_check_dns=${CONN_CHECK_DNS:-""}
Expand Down
20 changes: 11 additions & 9 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@ import (
v1 "kubevirt.io/client-go/api/v1"
)

const ExtensionAPIServerAuthenticationConfigMap = "extension-apiserver-authentication"
const RequestHeaderClientCAFileKey = "requestheader-client-ca-file"
const VirtShareDir = "/var/run/kubevirt"
const VirtPrivateDir = "/var/run/kubevirt-private"
const VirtLibDir = "/var/lib/kubevirt"
const KubeletPodsDir = "/var/lib/kubelet/pods"
const HostRootMount = "/proc/1/root/"
const CPUManagerOS3Path = HostRootMount + "var/lib/origin/openshift.local.volumes/cpu_manager_state"
const CPUManagerPath = HostRootMount + "var/lib/kubelet/cpu_manager_state"
const (
ExtensionAPIServerAuthenticationConfigMap = "extension-apiserver-authentication"
RequestHeaderClientCAFileKey = "requestheader-client-ca-file"
VirtShareDir = "/var/run/kubevirt"
VirtPrivateDir = "/var/run/kubevirt-private"
VirtLibDir = "/var/lib/kubevirt"
KubeletPodsDir = "/var/lib/kubelet/pods"
HostRootMount = "/proc/1/root/"
CPUManagerOS3Path = HostRootMount + "var/lib/origin/openshift.local.volumes/cpu_manager_state"
CPUManagerPath = HostRootMount + "var/lib/kubelet/cpu_manager_state"
)

const NonRootUID = 107
const NonRootUserString = "qemu"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ func ValidateVirtualMachineInstanceSpec(field *k8sfield.Path, spec *v1.VirtualMa
causes = append(causes, validateCPUIsolatorThread(field, spec)...)
causes = append(causes, validateCPUFeaturePolicies(field, spec)...)
causes = append(causes, validateStartStrategy(field, spec)...)
causes = append(causes, validateRealtime(field, spec)...)

maxNumberOfInterfacesExceeded := len(spec.Domain.Devices.Interfaces) > arrayLenMax
if maxNumberOfInterfacesExceeded {
Expand Down Expand Up @@ -996,7 +997,7 @@ func validateNUMA(field *k8sfield.Path, spec *v1.VirtualMachineInstanceSpec, con
Field: field.Child("domain", "cpu", "numa", "guestMappingPassthrough").String(),
})
}
if spec.Domain.CPU.DedicatedCPUPlacement == false {
if !spec.Domain.CPU.DedicatedCPUPlacement {
causes = append(causes, metav1.StatusCause{
Type: metav1.CauseTypeFieldValueInvalid,
Message: fmt.Sprintf("%s must be set to true when NUMA topology strategy is set in %s",
Expand Down Expand Up @@ -1371,6 +1372,42 @@ func validateHostNameNotConformingToDNSLabelRules(field *k8sfield.Path, spec *v1
return causes
}

func validateRealtime(field *k8sfield.Path, spec *v1.VirtualMachineInstanceSpec) (causes []metav1.StatusCause) {
if spec.Domain.CPU != nil && spec.Domain.CPU.Realtime != nil {
causes = append(causes, validateCPURealtime(field, spec)...)
causes = append(causes, validateMemoryRealtime(field, spec)...)
}
return causes
}

func validateCPURealtime(field *k8sfield.Path, spec *v1.VirtualMachineInstanceSpec) (causes []metav1.StatusCause) {
if !spec.Domain.CPU.DedicatedCPUPlacement {
causes = append(causes, metav1.StatusCause{
Type: metav1.CauseTypeFieldValueRequired,
Message: fmt.Sprintf("%s must be set to true when %s is used",
field.Child("domain", "cpu", "dedicatedCpuPlacement").String(),
field.Child("domain", "cpu", "realtime").String(),
),
Field: field.Child("domain", "cpu", "dedicatedCpuPlacement").String(),
})
}
return causes
}

func validateMemoryRealtime(field *k8sfield.Path, spec *v1.VirtualMachineInstanceSpec) (causes []metav1.StatusCause) {
if spec.Domain.CPU.NUMA == nil || spec.Domain.CPU.NUMA.GuestMappingPassthrough == nil {
causes = append(causes, metav1.StatusCause{
Type: metav1.CauseTypeFieldValueRequired,
Message: fmt.Sprintf("%s must be defined when %s is used",
field.Child("domain", "cpu", "numa", "guestMappingPassthrough").String(),
field.Child("domain", "cpu", "realtime").String(),
),
Field: field.Child("domain", "cpu", "numa", "guestMappingPassthrough").String(),
})
}
return causes
}

func appendNewStatusCauseForHostNameNotConformingToDNSLabelRules(field *k8sfield.Path, causes []metav1.StatusCause, errors []string) []metav1.StatusCause {
return append(causes, metav1.StatusCause{
Type: metav1.CauseTypeFieldValueInvalid,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3467,6 +3467,36 @@ var _ = Describe("Validating VMICreate Admitter", func() {
Expect(len(causes)).To(Equal(1))
})
})

Context("with realtime", func() {
var vmi *v1.VirtualMachineInstance
BeforeEach(func() {
vmi = v1.NewMinimalVMI("testvmi")
vmi.Spec.Domain.CPU = &v1.CPU{Realtime: &v1.Realtime{}, Cores: 4}
enableFeatureGate(virtconfig.NUMAFeatureGate)
})
It("should reject the realtime knob without DedicatedCPUPlacement", func() {
vmi.Spec.Domain.Memory = &v1.Memory{Hugepages: &v1.Hugepages{PageSize: "2Mi"}}
vmi.Spec.Domain.CPU.NUMA = &v1.NUMA{GuestMappingPassthrough: &v1.NUMAGuestMappingPassthrough{}}
causes := ValidateVirtualMachineInstanceSpec(k8sfield.NewPath("fake"), &vmi.Spec, config)
Expect(len(causes)).To(BeNumerically(">=", 1))
Expect(causes).To(ContainElement(metav1.StatusCause{Type: metav1.CauseTypeFieldValueRequired, Field: "fake.domain.cpu.dedicatedCpuPlacement", Message: "fake.domain.cpu.dedicatedCpuPlacement must be set to true when fake.domain.cpu.realtime is used"}))
})
It("should reject the realtime knob when NUMA Guest Mapping Passthrough is not defined", func() {
vmi.Spec.Domain.CPU.DedicatedCPUPlacement = true
vmi.Spec.Domain.CPU.NUMA = &v1.NUMA{}
causes := ValidateVirtualMachineInstanceSpec(k8sfield.NewPath("fake"), &vmi.Spec, config)
Expect(causes).To(HaveLen(1))
Expect(causes).To(ContainElement(metav1.StatusCause{Type: metav1.CauseTypeFieldValueRequired, Field: "fake.domain.cpu.numa.guestMappingPassthrough", Message: "fake.domain.cpu.numa.guestMappingPassthrough must be defined when fake.domain.cpu.realtime is used"}))
})
It("should reject the realtime knob when NUMA is nil", func() {
vmi.Spec.Domain.CPU.DedicatedCPUPlacement = true
vmi.Spec.Domain.CPU.NUMA = nil
causes := ValidateVirtualMachineInstanceSpec(k8sfield.NewPath("fake"), &vmi.Spec, config)
Expect(causes).To(HaveLen(1))
Expect(causes).To(ContainElement(metav1.StatusCause{Type: metav1.CauseTypeFieldValueRequired, Field: "fake.domain.cpu.numa.guestMappingPassthrough", Message: "fake.domain.cpu.numa.guestMappingPassthrough must be defined when fake.domain.cpu.realtime is used"}))
})
})
})

var _ = Describe("Function getNumberOfPodInterfaces()", func() {
Expand Down Expand Up @@ -3885,5 +3915,4 @@ var _ = Describe("Function getNumberOfPodInterfaces()", func() {
causes := webhooks.ValidateVirtualMachineInstanceHypervFeatureDependencies(path, &vmi.Spec)
Expect(len(causes)).To(Equal(0))
})

})
7 changes: 3 additions & 4 deletions pkg/virt-controller/services/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
gracePeriodKillAfter := gracePeriodSeconds + int64(15)

// Get memory overhead
memoryOverhead := getMemoryOverhead(vmi, t.clusterConfig.GetClusterCPUArch())
memoryOverhead := GetMemoryOverhead(vmi, t.clusterConfig.GetClusterCPUArch())

// Consider CPU and memory requests and limits for pod scheduling
resources := k8sv1.ResourceRequirements{}
Expand Down Expand Up @@ -1765,7 +1765,6 @@ func getRequiredCapabilities(vmi *v1.VirtualMachineInstance, config *virtconfig.
capabilities = append(capabilities, CAP_SYS_ADMIN)
capabilities = append(capabilities, getVirtiofsCapabilities()...)
}

return capabilities
}

Expand Down Expand Up @@ -1796,7 +1795,7 @@ func appendUniqueImagePullSecret(secrets []k8sv1.LocalObjectReference, newsecret
return append(secrets, newsecret)
}

// getMemoryOverhead computes the estimation of total
// GetMemoryOverhead computes the estimation of total
// memory needed for the domain to operate properly.
// This includes the memory needed for the guest and memory
// for Qemu and OS overhead.
Expand All @@ -1805,7 +1804,7 @@ func appendUniqueImagePullSecret(secrets []k8sv1.LocalObjectReference, newsecret
//
// Note: This is the best estimation we were able to come up with
// and is still not 100% accurate
func getMemoryOverhead(vmi *v1.VirtualMachineInstance, cpuArch string) *resource.Quantity {
func GetMemoryOverhead(vmi *v1.VirtualMachineInstance, cpuArch string) *resource.Quantity {
domain := vmi.Spec.Domain
vmiMemoryReq := domain.Resources.Requests.Memory()

Expand Down
37 changes: 35 additions & 2 deletions pkg/virt-controller/services/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2325,7 +2325,7 @@ var _ = Describe("Template", func() {
arch := config.GetClusterCPUArch()
Expect(err).ToNot(HaveOccurred())
expectedMemory := resource.NewScaledQuantity(0, resource.Kilo)
expectedMemory.Add(*getMemoryOverhead(vmi, arch))
expectedMemory.Add(*GetMemoryOverhead(vmi, arch))
expectedMemory.Add(*vmi.Spec.Domain.Resources.Requests.Memory())
Expect(pod.Spec.Containers[0].Resources.Requests.Memory().Value()).To(Equal(expectedMemory.Value()))
})
Expand Down Expand Up @@ -2353,7 +2353,7 @@ var _ = Describe("Template", func() {
arch := config.GetClusterCPUArch()
Expect(err).ToNot(HaveOccurred())
expectedMemory := resource.NewScaledQuantity(0, resource.Kilo)
expectedMemory.Add(*getMemoryOverhead(vmi1, arch))
expectedMemory.Add(*GetMemoryOverhead(vmi1, arch))
expectedMemory.Add(*vmi.Spec.Domain.Resources.Requests.Memory())
Expect(pod.Spec.Containers[0].Resources.Requests.Memory().Value()).To(Equal(expectedMemory.Value()))
Expect(pod1.Spec.Containers[0].Resources.Requests.Memory().Value()).To(Equal(expectedMemory.Value()))
Expand Down Expand Up @@ -3154,6 +3154,39 @@ var _ = Describe("Template", func() {
}
})

Context("With a realtime workload", func() {
It("should calculate the overhead memory including the requested memory", func() {
config, kvInformer, svc = configFactory(defaultArch)
vmi := newMinimalWithContainerDisk("testvmi")
vmi.Spec.Domain.Resources = v1.ResourceRequirements{
Requests: kubev1.ResourceList{
kubev1.ResourceMemory: resource.MustParse("1G"),
kubev1.ResourceCPU: resource.MustParse("1"),
},
Limits: kubev1.ResourceList{
kubev1.ResourceMemory: resource.MustParse("1G"),
kubev1.ResourceCPU: resource.MustParse("1"),
},
}
vmi.Spec.Domain.CPU = &v1.CPU{
Cores: 1,
Sockets: 1,
Threads: 1,
DedicatedCPUPlacement: true,
NUMA: &v1.NUMA{},
IsolateEmulatorThread: true,
Realtime: &v1.Realtime{},
}

pod, err := svc.RenderLaunchManifest(vmi)
arch := config.GetClusterCPUArch()
Expect(err).ToNot(HaveOccurred())
expectedMemory := resource.NewScaledQuantity(0, resource.Kilo)
expectedMemory.Add(*GetMemoryOverhead(vmi, arch))
expectedMemory.Add(*vmi.Spec.Domain.Resources.Requests.Memory())
Expect(pod.Spec.Containers[0].Resources.Requests.Memory().Value()).To(Equal(expectedMemory.Value()))
})
})
})

Describe("ServiceAccountName", func() {
Expand Down
1 change: 1 addition & 0 deletions pkg/virt-handler/isolation/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ go_library(
deps = [
"//pkg/container-disk:go_default_library",
"//pkg/util:go_default_library",
"//pkg/virt-controller/services:go_default_library",
"//pkg/virt-handler/cgroup:go_default_library",
"//pkg/virt-handler/cmd-client:go_default_library",
"//pkg/virt-handler/virt-chroot:go_default_library",
Expand Down
Loading

0 comments on commit 1b7069c

Please sign in to comment.