Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion docs/crio.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ CRI-O reads its storage defaults from the containers-storage.conf(5) file locate
only happen when CRI-O has been upgraded

**internal_wipe**=true
**This option is currently DEPRECATED, and will be removed in the future.**
Whether CRI-O should wipe containers after a reboot and images after an upgrade when the server starts.
If set to false, one must run `crio wipe` to wipe the containers and images in these situations.
This option is deprecated, and will be removed in the future.

**clean_shutdown_file**="/var/lib/crio/clean.shutdown"
Location for CRI-O to lay down the clean shutdown file.
Expand Down Expand Up @@ -277,12 +277,44 @@ The "crio.runtime.runtimes" table defines a list of OCI compatible runtimes. Th
Whether this runtime handler prevents host devices from being passed to privileged containers.

**allowed_annotations**=[]
**This field is currently DEPRECATED. If you'd like to use allowed_annotations, please use a workload.**
A list of experimental annotations this runtime handler is allowed to process.
The currently recognized values are:
"io.kubernetes.cri-o.userns-mode" for configuring a user namespace for the pod.
"io.kubernetes.cri-o.Devices" for configuring devices for the pod.
"io.kubernetes.cri-o.ShmSize" for configuring the size of /dev/shm.
"io.kubernetes.cri-o.UnifiedCgroup.$CTR_NAME" for configuring the cgroup v2 unified block for a container.
"io.containers.trace-syscall" for tracing syscalls via the OCI seccomp BPF hook.

### CRIO.RUNTIME.WORKLOADS TABLE
The "crio.runtime.workloads" table defines a list of workloads - a way to customize the behavior of a pod and container.
A workload is chosen for a pod based on whether the workload's **activation_annotation** is an annotation on the pod.

**activation_annotation**=""
activation_annotation is the pod annotation that activates these workload settings.

**annotation_prefix**=""
annotation_prefix is the way a pod can override a specific resource for a container.
The full annotation must be of the form `$annotation_prefix.$resource/$ctrname = $value`.

**allowed_annotations**=[]
allowed_annotations is a slice of experimental annotations that this workload is allowed to process.
The currently recognized values are:
"io.kubernetes.cri-o.userns-mode" for configuring a user namespace for the pod.
"io.kubernetes.cri-o.Devices" for configuring devices for the pod.
"io.kubernetes.cri-o.ShmSize" for configuring the size of /dev/shm.
"io.kubernetes.cri-o.UnifiedCgroup.$CTR_NAME" for configuring the cgroup v2 unified block for a container.
"io.containers.trace-syscall" for tracing syscalls via the OCI seccomp BPF hook.

### CRIO.RUNTIME.WORKLOAD.RESOURCES TABLE
The resources table is a structure for overriding certain resources for pods using this workload.
This structure provides a default value, and can be overridden by using the AnnotationPrefix.

**cpushares**=""
Specifies the number of CPU shares this pod has access to.

**cpuset**=""
Specifies the cpuset this pod has access to.

## CRIO.IMAGE TABLE
The `crio.image` table contains settings pertaining to the management of OCI images.
Expand Down
19 changes: 5 additions & 14 deletions internal/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,23 +200,14 @@ func (r *Runtime) PrivilegedWithoutHostDevices(handler string) (bool, error) {
return rh.PrivilegedWithoutHostDevices, nil
}

// FilterDisallowedAnnotations filters annotations that are not specified in the allowed_annotations map
// for a given handler.
// This function returns an error if the runtime handler can't be found.
// The annotations map is mutated in-place.
func (r *Runtime) FilterDisallowedAnnotations(handler string, annotations map[string]string) error {
// AllowedAnnotations returns the allowed annotations for this runtime.
func (r *Runtime) AllowedAnnotations(handler string) ([]string, error) {
rh, err := r.getRuntimeHandler(handler)
if err != nil {
return err
}
for ann := range annotations {
for _, disallowed := range rh.DisallowedAnnotations {
if strings.HasPrefix(ann, disallowed) {
delete(annotations, disallowed)
}
}
return []string{}, err
}
return nil

return rh.AllowedAnnotations, nil
}

// RuntimeType returns the type of runtimeHandler
Expand Down
23 changes: 7 additions & 16 deletions internal/oci/oci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,32 +121,23 @@ var _ = t.Describe("Oci", func() {
Expect(err).To(BeNil())
Expect(runtimeType).To(Equal(config.RuntimeTypeVM))
})
Context("FilterDisallowedAnnotations", func() {
It("should succeed to filter disallowed annotation", func() {
Context("AllowedAnnotations", func() {
It("should succeed to return allowed annotation", func() {
// Given
testAnn := map[string]string{
annotations.DevicesAnnotation: "/dev",
annotations.IRQLoadBalancingAnnotation: "true",
}
Expect(runtimes[performanceRuntime].ValidateRuntimeAllowedAnnotations()).To(BeNil())

// When
err := sut.FilterDisallowedAnnotations(performanceRuntime, testAnn)
foundAnn, err := sut.AllowedAnnotations(performanceRuntime)

// Then
Expect(err).To(BeNil())
_, ok := testAnn[annotations.DevicesAnnotation]
Expect(ok).To(Equal(false))

_, ok = testAnn[annotations.IRQLoadBalancingAnnotation]
Expect(ok).To(Equal(true))
Expect(foundAnn).NotTo(ContainElement(annotations.DevicesAnnotation))
Expect(foundAnn).To(ContainElement(annotations.IRQLoadBalancingAnnotation))
})
It("should fail to filter disallowed annotation of unknown runtime", func() {
It("should fail to return allowed annotation of unknown runtime", func() {
// Given
testAnn := map[string]string{}

// When
err := sut.FilterDisallowedAnnotations("invalid", testAnn)
_, err := sut.AllowedAnnotations("invalid")

// Then
Expect(err).NotTo(BeNil())
Expand Down
65 changes: 54 additions & 11 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,10 @@ func (c *RuntimeConfig) Validate(systemContext *types.SystemContext, onExecution
return errors.Wrap(err, "runtime validation")
}

if err := c.ValidateAllowedAnnotations(); err != nil {
return errors.Wrap(err, "allowed annotations validation")
}

// Validate the system registries configuration
if _, err := sysregistriesv2.GetRegistries(systemContext); err != nil {
return errors.Wrap(err, "invalid registries")
Expand Down Expand Up @@ -1224,22 +1228,61 @@ func (r *RuntimeHandler) ValidateRuntimeConfigPath(name string) error {
}

func (r *RuntimeHandler) ValidateRuntimeAllowedAnnotations() error {
disallowedAnnotations := make(map[string]struct{})
disallowed, err := validateAllowedAndGenerateDisallowedAnnotations(r.AllowedAnnotations)
if err != nil {
return err
}
logrus.Debugf(
"Allowed annotations for runtime: %v", r.AllowedAnnotations,
)
r.DisallowedAnnotations = disallowed
return nil
}

func validateAllowedAndGenerateDisallowedAnnotations(allowed []string) (disallowed []string, _ error) {
disallowedMap := make(map[string]struct{})
for _, ann := range annotations.AllAllowedAnnotations {
disallowedAnnotations[ann] = struct{}{}
disallowedMap[ann] = struct{}{}
}
for _, allowed := range r.AllowedAnnotations {
if _, ok := disallowedAnnotations[allowed]; !ok {
return errors.Errorf("invalid allowed_annotation: %s", allowed)
for _, ann := range allowed {
if _, ok := disallowedMap[ann]; !ok {
return nil, errors.Errorf("invalid allowed_annotation: %s", ann)
}
delete(disallowedAnnotations, allowed)
delete(disallowedMap, ann)
}
for ann := range disallowedAnnotations {
r.DisallowedAnnotations = append(r.DisallowedAnnotations, ann)
disallowed = make([]string, 0, len(disallowedMap))
for ann := range disallowedMap {
disallowed = append(disallowed, ann)
}
return disallowed, nil
}

// In the interim between adding workload level allowed annotations
// and disabling runtime level allowed annotations, we need to do a separate
// validation step to ensure neither list are stepping on the other's toes.
// Instead of complicated logic, declare workload level allowed annotations to
// always overwrite runtime level ones.
func (c *RuntimeConfig) ValidateAllowedAnnotations() error {
var workloadHasAnnotation bool
for _, wl := range c.Workloads {
if len(wl.AllowedAnnotations) != 0 {
workloadHasAnnotation = true
}
}
if !workloadHasAnnotation {
for _, wl := range c.Workloads {
wl.AllowedAnnotations = []string{}
wl.DisallowedAnnotations = []string{}
}
logrus.Infof("Workload does not have an allowed annotation configured. Clearing allowed annotations from runtimes")
return nil
}
logrus.Infof("Workload has an allowed annotation configured. Clearing allowed annotations from runtimes")
for name, rh := range c.Runtimes {
logrus.Infof("Clearing allowed annotations from %s", name)
rh.AllowedAnnotations = []string{}
rh.DisallowedAnnotations = []string{}
}
logrus.Debugf(
"Allowed annotations for runtime: %v", r.AllowedAnnotations,
)
return nil
}

Expand Down
84 changes: 77 additions & 7 deletions pkg/config/workloads.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@ package config

import (
"encoding/json"
"strings"

"github.com/opencontainers/runtime-tools/generate"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
)

type Resources struct {
CPUShares uint64 `json:"cpushares,omitempty"`
CPUSet string `json:"cpuset,omitempty"`
}

type Workloads map[string]*WorkloadConfig

type WorkloadConfig struct {
Expand All @@ -21,6 +18,16 @@ type WorkloadConfig struct {
// AnnotationPrefix is the way a pod can override a specific resource for a container.
// The full annotation must be of the form $annotation_prefix.$resource/$ctrname = $value
AnnotationPrefix string `toml:"annotation_prefix"`
// AllowedAnnotations is a slice of experimental annotations that this workload is allowed to process.
// The currently recognized values are:
// "io.kubernetes.cri-o.userns-mode" for configuring a user namespace for the pod.
// "io.kubernetes.cri-o.Devices" for configuring devices for the pod.
// "io.kubernetes.cri-o.ShmSize" for configuring the size of /dev/shm.
// "io.kubernetes.cri-o.UnifiedCgroup.$CTR_NAME" for configuring the cgroup v2 unified block for a container.
// "io.containers.trace-syscall" for tracing syscalls via the OCI seccomp BPF hook.
AllowedAnnotations []string `toml:"allowed_annotations,omitempty"`
// DisallowedAnnotations is the slice of experimental annotations that are not allowed for this workload.
DisallowedAnnotations []string
// Resources are the names of the resources that can be overridden by annotation.
// The key of the map is the resource name. The following resources are supported:
// `cpushares`: configure cpu shares for a given container
Expand All @@ -32,6 +39,16 @@ type WorkloadConfig struct {
Resources *Resources `toml:"resources"`
}

// Resources is a structure for overriding certain resources for the pod.
// This resources structure provides a default value, and can be overridden
// by using the AnnotationPrefix.
type Resources struct {
// Specifies the number of CPU shares this pod has access to.
CPUShares uint64 `json:"cpushares,omitempty"`
// Specifies the cpuset this pod has access to.
CPUSet string `json:"cpuset,omitempty"`
}

func (w Workloads) Validate() error {
for workload, config := range w {
if err := config.Validate(workload); err != nil {
Expand All @@ -45,9 +62,53 @@ func (w *WorkloadConfig) Validate(workloadName string) error {
if w.ActivationAnnotation == "" {
return errors.Errorf("annotation shouldn't be empty for workload %q", workloadName)
}
if err := w.ValidateWorkloadAllowedAnnotations(); err != nil {
return err
}
return w.Resources.ValidateDefaults()
}

func (w *WorkloadConfig) ValidateWorkloadAllowedAnnotations() error {
disallowed, err := validateAllowedAndGenerateDisallowedAnnotations(w.AllowedAnnotations)
if err != nil {
return err
}
logrus.Debugf(
"Allowed annotations for workload: %v", w.AllowedAnnotations,
)
w.DisallowedAnnotations = disallowed
return nil
}

func (w Workloads) AllowedAnnotations(toFind map[string]string) []string {
workload := w.workloadGivenActivationAnnotation(toFind)
if workload == nil {
return []string{}
}
return workload.AllowedAnnotations
}

// FilterDisallowedAnnotations filters annotations that are not specified in the allowed_annotations map
// for a given handler.
// This function returns an error if the runtime handler can't be found.
// The annotations map is mutated in-place.
func (w Workloads) FilterDisallowedAnnotations(allowed []string, toFilter map[string]string) error {
disallowed, err := validateAllowedAndGenerateDisallowedAnnotations(allowed)
if err != nil {
return err
}
logrus.Warnf("Allowed annotations are specified for workload %v", allowed)

for ann := range toFilter {
for _, d := range disallowed {
if strings.HasPrefix(ann, d) {
delete(toFilter, d)
}
}
}
return nil
}

func (w Workloads) MutateSpecGivenAnnotations(ctrName string, specgen *generate.Generator, sboxAnnotations map[string]string) error {
workload := w.workloadGivenActivationAnnotation(sboxAnnotations)
if workload == nil {
Expand All @@ -73,9 +134,9 @@ func (w Workloads) workloadGivenActivationAnnotation(sboxAnnotations map[string]
return nil
}

func resourcesFromAnnotation(prefix, ctrName string, annotations map[string]string, defaultResources *Resources) (*Resources, error) {
func resourcesFromAnnotation(prefix, ctrName string, allAnnotations map[string]string, defaultResources *Resources) (*Resources, error) {
annotationKey := prefix + "/" + ctrName
value, ok := annotations[annotationKey]
value, ok := allAnnotations[annotationKey]
if !ok {
return defaultResources, nil
}
Expand All @@ -84,6 +145,9 @@ func resourcesFromAnnotation(prefix, ctrName string, annotations map[string]stri
if err := json.Unmarshal([]byte(value), &resources); err != nil {
return nil, err
}
if resources == nil {
return nil, nil
}

if resources.CPUSet == "" {
resources.CPUSet = defaultResources.CPUSet
Expand All @@ -96,6 +160,9 @@ func resourcesFromAnnotation(prefix, ctrName string, annotations map[string]stri
}

func (r *Resources) ValidateDefaults() error {
if r == nil {
return nil
}
if r.CPUSet == "" {
return nil
}
Expand All @@ -104,6 +171,9 @@ func (r *Resources) ValidateDefaults() error {
}

func (r *Resources) MutateSpec(specgen *generate.Generator) {
if r == nil {
return
}
if r.CPUSet != "" {
specgen.SetLinuxResourcesCPUCpus(r.CPUSet)
}
Expand Down
2 changes: 1 addition & 1 deletion server/container_create_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ func (s *Server) createSandboxContainer(ctx context.Context, ctr ctrIface.Contai
// TODO: eventually, this should be in the container package, but it's going through a lot of churn
// and SpecAddAnnotations is already being passed too many arguments
// Filter early so any use of the annotations don't use the wrong values
if err := s.Runtime().FilterDisallowedAnnotations(sb.RuntimeHandler(), ctr.Config().Annotations); err != nil {
if err := s.FilterDisallowedAnnotations(sb.Annotations(), ctr.Config().Annotations, sb.RuntimeHandler()); err != nil {
return nil, err
}

Expand Down
5 changes: 3 additions & 2 deletions server/sandbox_run_linux.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build linux
// +build linux

package server
Expand Down Expand Up @@ -338,8 +339,8 @@ func (s *Server) runPodSandbox(ctx context.Context, req *types.RunPodSandboxRequ
return nil, err
}

if err := s.Runtime().FilterDisallowedAnnotations(runtimeHandler, sbox.Config().Annotations); err != nil {
return nil, errors.Wrap(err, "filter disallowed annotations")
if err := s.FilterDisallowedAnnotations(sbox.Config().Annotations, sbox.Config().Annotations, runtimeHandler); err != nil {
return nil, err
}

kubeAnnotations := sbox.Config().Annotations
Expand Down
Loading