diff --git a/internal/oci/oci.go b/internal/oci/oci.go index 00ca592ba29..e4500e0fc5a 100644 --- a/internal/oci/oci.go +++ b/internal/oci/oci.go @@ -231,6 +231,13 @@ func (r *Runtime) AllowShmSizeAnnotation(handler string) (bool, error) { return r.allowAnnotation(handler, annotations.ShmSizeAnnotation) } +// AllowOCISeccompBPFHookAnnotation searches through the AllowedAnnotations for +// the OCI seccomp BPF hook annotation, checking whether this runtime allows +// processing of "io.containers.trace-syscall". +func (r *Runtime) AllowOCISeccompBPFHookAnnotation(handler string) (bool, error) { + return r.allowAnnotation(handler, annotations.OCISeccompBPFHookAnnotation) +} + func (r *Runtime) allowAnnotation(handler, annotation string) (bool, error) { rh, err := r.getRuntimeHandler(handler) if err != nil { diff --git a/internal/oci/oci_test.go b/internal/oci/oci_test.go index 3df9721bc2a..25582d2cd16 100644 --- a/internal/oci/oci_test.go +++ b/internal/oci/oci_test.go @@ -55,6 +55,7 @@ var _ = t.Describe("Oci", func() { annotations.CPULoadBalancingAnnotation, annotations.IRQLoadBalancingAnnotation, annotations.CPUQuotaAnnotation, + annotations.OCISeccompBPFHookAnnotation, }, }, } @@ -141,6 +142,24 @@ var _ = t.Describe("Oci", func() { Expect(err).To(BeNil()) Expect(allowed).To(Equal(true)) }) + It("AllowOCISeccompBPFHookAnnotation should be true when set", func() { + // Given + // When + allowed, err := sut.AllowOCISeccompBPFHookAnnotation(performanceRuntime) + + // Then + Expect(err).To(BeNil()) + Expect(allowed).To(Equal(true)) + }) + It("AllowOCISeccompBPFHookAnnotation should be false when runtime invalid", func() { + // Given + // When + allowed, err := sut.AllowOCISeccompBPFHookAnnotation(invalidRuntime) + + // Then + Expect(err).NotTo(BeNil()) + Expect(allowed).To(Equal(false)) + }) }) t.Describe("ExecSyncError", func() { diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go index d4d96ac27d8..72f3dbc8fda 100644 --- a/pkg/annotations/annotations.go +++ b/pkg/annotations/annotations.go @@ -24,4 +24,7 @@ const ( // IRQLoadBalancingAnnotation indicates that IRQ load balancing should be disabled for CPUs used by the container IRQLoadBalancingAnnotation = "irq-load-balancing.crio.io" + + // OCISeccompBPFHookAnnotation is the annotation used by the OCI seccomp BPF hook for tracing container syscalls + OCISeccompBPFHookAnnotation = "io.containers.trace-syscall" ) diff --git a/pkg/config/config.go b/pkg/config/config.go index 6df4e6347db..5868c87f119 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -26,6 +26,7 @@ import ( "github.com/cri-o/cri-o/internal/config/nsmgr" "github.com/cri-o/cri-o/internal/config/seccomp" "github.com/cri-o/cri-o/internal/config/ulimits" + "github.com/cri-o/cri-o/pkg/annotations" "github.com/cri-o/cri-o/server/useragent" "github.com/cri-o/cri-o/utils" "github.com/cri-o/ocicni/pkg/ocicni" @@ -164,6 +165,7 @@ type RuntimeHandler struct { // "io.kubernetes.cri-o.Devices" for configuring devices for the pod. // "io.kubernetes.cri-o.ShmSize" for configuring the size of /dev/shm. // "io.kubernetes.cri-o.UnifiedCgroup.$CTR_NAME" for configuring the cgroup v2 unified block for a container. + // "io.containers.trace-syscall" for tracing syscalls via the OCI seccomp BPF hook. AllowedAnnotations []string `toml:"allowed_annotations,omitempty"` } @@ -594,6 +596,9 @@ func DefaultConfig() (*Config, error) { defaultRuntime: { RuntimeType: DefaultRuntimeType, RuntimeRoot: DefaultRuntimeRoot, + AllowedAnnotations: []string{ + annotations.OCISeccompBPFHookAnnotation, + }, }, }, ConmonEnv: []string{ @@ -1039,8 +1044,12 @@ func (r *RuntimeHandler) ValidateRuntimePath(name string) error { return fmt.Errorf("invalid runtime_path for runtime '%s': %q", name, err) } - logrus.Debugf("found valid runtime %q for runtime_path %q", - name, r.RuntimePath) + logrus.Debugf( + "Found valid runtime %q for runtime_path %q", name, r.RuntimePath, + ) + logrus.Debugf( + "Allowed annotations for runtime: %v", r.AllowedAnnotations, + ) return nil } diff --git a/pkg/config/template.go b/pkg/config/template.go index 272d3208da1..be677f2fe4d 100644 --- a/pkg/config/template.go +++ b/pkg/config/template.go @@ -304,6 +304,7 @@ default_runtime = "{{ .DefaultRuntime }}" # "io.kubernetes.cri-o.Devices" for configuring devices for the pod. # "io.kubernetes.cri-o.ShmSize" for configuring the size of /dev/shm. # "io.kubernetes.cri-o.UnifiedCgroup.$CTR_NAME" for configuring the cgroup v2 unified block for a container. +# "io.containers.trace-syscall" for tracing syscalls via the OCI seccomp BPF hook. {{ range $runtime_name, $runtime_handler := .Runtimes }} [crio.runtime.runtimes.{{ $runtime_name }}] diff --git a/pkg/container/container.go b/pkg/container/container.go index fc6ea7a32a2..ea88794fb83 100644 --- a/pkg/container/container.go +++ b/pkg/container/container.go @@ -15,6 +15,7 @@ import ( "github.com/cri-o/cri-o/internal/config/device" "github.com/cri-o/cri-o/internal/lib" "github.com/cri-o/cri-o/internal/lib/sandbox" + "github.com/cri-o/cri-o/internal/log" oci "github.com/cri-o/cri-o/internal/oci" "github.com/cri-o/cri-o/internal/storage" crioann "github.com/cri-o/cri-o/pkg/annotations" @@ -146,8 +147,35 @@ func (c *container) SpecAddAnnotations(sb *sandbox.Sandbox, containerVolumes []o // Preserve the sandbox annotations. OCI hooks may re-use the sandbox // annotation values to apply them to the container later on. + // The sandbox annotations are already filtered for the allowed + // annotations, there is no need to check it additionally here. for k, v := range sb.Annotations() { - c.spec.AddAnnotation(k, v) + if strings.HasPrefix(k, crioann.OCISeccompBPFHookAnnotation) { + // The OCI seccomp BPF hook + // (https://github.com/containers/oci-seccomp-bpf-hook) + // uses the annotation io.containers.trace-syscall as indicator + // to attach a BFP module to the process. The recorded syscalls + // will be then stored in the output path file (annotation + // value prefixed with 'of:'). We now add a custom logic to be + // able to distinguish containers within pods in Kubernetes. If + // we suffix the container name within the annotation key like + // this: io.containers.trace-syscall/container + // Then we will rewrite the key to + // 'io.containers.trace-syscall' if the metadata name is equal + // to 'container'. This allows us to trace containers into + // distinguishable files. + if strings.TrimPrefix(k, crioann.OCISeccompBPFHookAnnotation+"/") == c.config.Metadata.Name { + log.Debugf(c.ctx, + "Annotation key for container %q rewritten to %q (value is: %q)", + c.config.Metadata.Name, crioann.OCISeccompBPFHookAnnotation, v, + ) + c.config.Annotations[crioann.OCISeccompBPFHookAnnotation] = v + c.spec.AddAnnotation(crioann.OCISeccompBPFHookAnnotation, v) + } else { + // Annotation not suffixed with the container name + c.spec.AddAnnotation(k, v) + } + } } c.spec.AddAnnotation(annotations.Image, image) diff --git a/server/sandbox_run_linux.go b/server/sandbox_run_linux.go index 10a040c84ca..91ccbe5fdf1 100644 --- a/server/sandbox_run_linux.go +++ b/server/sandbox_run_linux.go @@ -342,6 +342,15 @@ func (s *Server) runPodSandbox(ctx context.Context, req *types.RunPodSandboxRequ return nil, err } + allowOCISeccompBPFHookAnnotation, err := s.Runtime().AllowOCISeccompBPFHookAnnotation(runtimeHandler) + if err != nil { + return nil, errors.Wrap(err, "check for allowed OCI seccomp BPF hook annotation") + } + // Remove the OCI seccomp BPF hook annotation if it is not allowed + if !allowOCISeccompBPFHookAnnotation { + delete(kubeAnnotations, ann.OCISeccompBPFHookAnnotation) + } + idMappingsOptions, err := s.configureSandboxIDMappings(usernsMode, sbox.Config().Linux.SecurityContext, runtimeHandler) if err != nil { return nil, err