diff --git a/go.mod b/go.mod index 324d6af1c35..3fd74533697 100644 --- a/go.mod +++ b/go.mod @@ -40,7 +40,7 @@ require ( github.com/onsi/gomega v1.13.0 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.0.2-0.20200206005212-79b036d80240 - github.com/opencontainers/runc v1.0.0-rc95 + github.com/opencontainers/runc v1.0.0-rc95.0.20210521141834-a95237f81684 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/opencontainers/runtime-tools v0.9.1-0.20200121211434-d1bf3e66ff0a github.com/opencontainers/selinux v1.8.2 diff --git a/go.sum b/go.sum index 9963c75facc..4d74c503199 100644 --- a/go.sum +++ b/go.sum @@ -1077,8 +1077,9 @@ github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runc v1.0.0-rc91/go.mod h1:3Sm6Dt7OT8z88EbdQqqcRN2oCT54jbi72tT/HqgflT8= github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0= -github.com/opencontainers/runc v1.0.0-rc95 h1:RMuWVfY3E1ILlVsC3RhIq38n4sJtlOFwU9gfFZSqrd0= github.com/opencontainers/runc v1.0.0-rc95/go.mod h1:z+bZxa/+Tz/FmYVWkhUajJdzFeOqjc5vrqskhVyHGUM= +github.com/opencontainers/runc v1.0.0-rc95.0.20210521141834-a95237f81684 h1:lxWmdjKd6ohpRh4G2ogFNS4EAyAbwWZnlOcfYfpu22s= +github.com/opencontainers/runc v1.0.0-rc95.0.20210521141834-a95237f81684/go.mod h1:z+bZxa/+Tz/FmYVWkhUajJdzFeOqjc5vrqskhVyHGUM= github.com/opencontainers/runtime-spec v1.0.3-0.20201121164853-7413a7f753e1 h1:UAfI7SOCo1CNIu3RevW9B4HQyf7SY5aSzcSeoC7OPs0= github.com/opencontainers/runtime-spec v1.0.3-0.20201121164853-7413a7f753e1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= diff --git a/internal/config/cgmgr/cgmgr.go b/internal/config/cgmgr/cgmgr.go index 3a0d8a9551d..220734d821e 100644 --- a/internal/config/cgmgr/cgmgr.go +++ b/internal/config/cgmgr/cgmgr.go @@ -58,11 +58,11 @@ type CgroupManager interface { // returns the cgroup parent, cgroup path, and error. For systemd cgroups, // it also checks there is enough memory in the given cgroup SandboxCgroupPath(string, string) (string, string, error) - // MoveConmonToCgroup takes the container ID, cgroup parent, conmon's cgroup (from the config) and conmon's PID - // It attempts to move conmon to the correct cgroup. + // MoveConmonToCgroup takes the container ID, cgroup parent, conmon's cgroup (from the config), conmon's PID, and some customized resources + // It attempts to move conmon to the correct cgroup, and set the resources for that cgroup. // It returns the cgroupfs parent that conmon was put into // so that CRI-O can clean the parent cgroup of the newly added conmon once the process terminates (systemd handles this for us) - MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string, pid int) (string, error) + MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string, pid int, resources *rspec.LinuxResources) (string, error) // CreateSandboxCgroup takes the sandbox parent, and sandbox ID. // It creates a new cgroup for that sandbox, which is useful when spoofing an infra container. CreateSandboxCgroup(sbParent, containerID string) error diff --git a/internal/config/cgmgr/cgmgr_test.go b/internal/config/cgmgr/cgmgr_test.go index 06e736a934a..6ccceea215f 100644 --- a/internal/config/cgmgr/cgmgr_test.go +++ b/internal/config/cgmgr/cgmgr_test.go @@ -160,7 +160,7 @@ var _ = t.Describe("Config", func() { // Given conmonCgroup := "notPodOrEmpty" // When - cgPath, err := sut.MoveConmonToCgroup("", "", conmonCgroup, 0) + cgPath, err := sut.MoveConmonToCgroup("", "", conmonCgroup, 0, nil) // Then Expect(cgPath).To(BeEmpty()) @@ -245,7 +245,7 @@ var _ = t.Describe("Config", func() { // Given conmonCgroup := "notPodOrEmpty" // When - cgPath, err := sut.MoveConmonToCgroup("", "", conmonCgroup, -1) + cgPath, err := sut.MoveConmonToCgroup("", "", conmonCgroup, -1, nil) // Then Expect(cgPath).To(BeEmpty()) diff --git a/internal/config/cgmgr/cgroupfs.go b/internal/config/cgmgr/cgroupfs.go index b7eb04c5dd1..0821d389342 100644 --- a/internal/config/cgmgr/cgroupfs.go +++ b/internal/config/cgmgr/cgroupfs.go @@ -9,6 +9,13 @@ import ( "strings" "github.com/containers/podman/v3/pkg/cgroups" + "github.com/containers/podman/v3/pkg/rootless" + "github.com/cri-o/cri-o/internal/config/node" + libctr "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + cgcfgs "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" rspec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -68,11 +75,15 @@ func (m *CgroupfsManager) SandboxCgroupPath(sbParent, sbID string) (cgParent, cg // It attempts to move conmon to the correct cgroup. // It returns the cgroupfs parent that conmon was put into // so that CRI-O can clean the cgroup path of the newly added conmon once the process terminates (systemd handles this for us) -func (*CgroupfsManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string, pid int) (string, error) { +func (*CgroupfsManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string, pid int, resources *rspec.LinuxResources) (cgroupPathToClean string, _ error) { if conmonCgroup != "pod" && conmonCgroup != "" { return "", errors.Errorf("conmon cgroup %s invalid for cgroupfs", conmonCgroup) } + if resources == nil { + resources = &rspec.LinuxResources{} + } + cgroupPath := fmt.Sprintf("%s/crio-conmon-%s", cgroupParent, cid) control, err := cgroups.New(cgroupPath, &rspec.LinuxResources{}) if err != nil { @@ -82,6 +93,10 @@ func (*CgroupfsManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup strin return cgroupPath, nil } + if err := setWorkloadSettings(cgroupPath, resources); err != nil { + return cgroupPath, err + } + // Record conmon's cgroup path in the container, so we can properly // clean it up when removing the container. // Here we should defer a crio-connmon- cgroup hierarchy deletion, but it will @@ -96,6 +111,51 @@ func (*CgroupfsManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup strin return cgroupPath, nil } +func setWorkloadSettings(cgPath string, resources *rspec.LinuxResources) error { + var mgr libctr.Manager + if resources.CPU == nil { + return nil + } + + paths := map[string]string{ + "cpuset": filepath.Join("/sys/fs/cgroup", "cpuset", cgPath), + "cpu": filepath.Join("/sys/fs/cgroup", "cpu", cgPath), + "freezer": filepath.Join("/sys/fs/cgroup", "freezer", cgPath), + "devices": filepath.Join("/sys/fs/cgroup", "devices", cgPath), + } + + cg := &cgcfgs.Cgroup{ + Name: cgPath, + Resources: &cgcfgs.Resources{}, + } + if resources.CPU.Cpus != "" { + cg.Resources.CpusetCpus = resources.CPU.Cpus + } + if resources.CPU.Shares != nil { + cg.Resources.CpuShares = *resources.CPU.Shares + } + + // We need to white list all devices + // so containers created underneath won't fail + cg.Resources.Devices = []*devices.Rule{ + { + Type: devices.WildcardDevice, + Allow: true, + }, + } + + if node.CgroupIsV2() { + var err error + mgr, err = fs2.NewManager(cg, cgPath, rootless.IsRootless()) + if err != nil { + return err + } + } else { + mgr = fs.NewManager(cg, paths, rootless.IsRootless()) + } + return mgr.Set(cg.Resources) +} + // CreateSandboxCgroup calls the helper function createSandboxCgroup for this manager. func (m *CgroupfsManager) CreateSandboxCgroup(sbParent, containerID string) error { return createSandboxCgroup(sbParent, containerID, m) diff --git a/internal/config/cgmgr/systemd.go b/internal/config/cgmgr/systemd.go index 82138a65079..32e7948ea28 100644 --- a/internal/config/cgmgr/systemd.go +++ b/internal/config/cgmgr/systemd.go @@ -9,9 +9,11 @@ import ( "strings" systemdDbus "github.com/coreos/go-systemd/v22/dbus" + "github.com/cri-o/cri-o/internal/config/node" "github.com/cri-o/cri-o/utils" "github.com/godbus/dbus/v5" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + rspec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -68,7 +70,7 @@ func (*SystemdManager) ContainerCgroupAbsolutePath(sbParent, containerID string) // cgroupPathToClean should always be returned empty. It is part of the interface to return the cgroup path // that cri-o is responsible for cleaning up upon the container's death. // Systemd takes care of this cleaning for us, so return an empty string -func (*SystemdManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string, pid int) (cgroupPathToClean string, _ error) { +func (*SystemdManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string, pid int, resources *rspec.LinuxResources) (cgroupPathToClean string, _ error) { if strings.HasSuffix(conmonCgroup, ".slice") { cgroupParent = conmonCgroup } @@ -77,12 +79,39 @@ func (*SystemdManager) MoveConmonToCgroup(cid, cgroupParent, conmonCgroup string // Set the systemd KillSignal to SIGPIPE that conmon ignores. // This helps during node shutdown so that conmon waits for the container // to exit and doesn't forward the SIGTERM that it gets. - killSignalProp := systemdDbus.Property{ - Name: "KillSignal", - Value: dbus.MakeVariant(int(unix.SIGPIPE)), + props := []systemdDbus.Property{ + { + Name: "KillSignal", + Value: dbus.MakeVariant(int(unix.SIGPIPE)), + }, + systemdDbus.PropAfter("crio.service"), } + + if resources != nil && resources.CPU != nil { + if resources.CPU.Cpus != "" { + if !node.SystemdHasAllowedCPUs() { + logrus.Errorf("Systemd does not support AllowedCPUs; skipping setting for workload") + } else { + bits, err := systemd.RangeToBits(resources.CPU.Cpus) + if err != nil { + return "", errors.Wrapf(err, "cpuset conversion error") + } + props = append(props, systemdDbus.Property{ + Name: "AllowedCPUs", + Value: dbus.MakeVariant(bits), + }) + } + } + if resources.CPU.Shares != nil { + props = append(props, systemdDbus.Property{ + Name: "CPUShares", + Value: dbus.MakeVariant(resources.CPU.Shares), + }) + } + } + logrus.Debugf("Running conmon under slice %s and unitName %s", cgroupParent, conmonUnitName) - if err := utils.RunUnderSystemdScope(pid, cgroupParent, conmonUnitName, killSignalProp, systemdDbus.PropAfter("crio.service")); err != nil { + if err := utils.RunUnderSystemdScope(pid, cgroupParent, conmonUnitName, props...); err != nil { return "", errors.Wrapf(err, "failed to add conmon to systemd sandbox cgroup") } // return empty string as path because cgroup cleanup is done by systemd diff --git a/internal/config/node/node.go b/internal/config/node/node.go index 8185c7f2d1c..ff95ad1bab1 100644 --- a/internal/config/node/node.go +++ b/internal/config/node/node.go @@ -55,6 +55,13 @@ func ValidateConfig() error { activated: &systemdHasCollectMode, fatal: false, }, + { + name: "systemd AllowedCPUs", + init: SystemdHasAllowedCPUs, + err: &systemdHasAllowedCPUsErr, + activated: &systemdHasAllowedCPUs, + fatal: false, + }, { name: "fs.may_detach_mounts sysctl", init: checkFsMayDetachMounts, diff --git a/internal/config/node/systemd.go b/internal/config/node/systemd.go index ca491159dd9..666776e2780 100644 --- a/internal/config/node/systemd.go +++ b/internal/config/node/systemd.go @@ -13,17 +13,35 @@ var ( systemdHasCollectModeOnce sync.Once systemdHasCollectMode bool systemdHasCollectModeErr error + + systemdHasAllowedCPUsOnce sync.Once + systemdHasAllowedCPUs bool + systemdHasAllowedCPUsErr error ) func SystemdHasCollectMode() bool { systemdHasCollectModeOnce.Do(func() { - // This will show whether the currently running systemd supports CollectMode - _, err := exec.Command("systemctl", "show", "-p", "CollectMode", "systemd").Output() - if err != nil { - systemdHasCollectModeErr = errors.Wrapf(err, "check systemd CollectMode") - return - } - systemdHasCollectMode = true + systemdHasCollectMode, systemdHasCollectModeErr = systemdSupportsProperty("CollectMode") }) return systemdHasCollectMode } + +func SystemdHasAllowedCPUs() bool { + systemdHasAllowedCPUsOnce.Do(func() { + systemdHasAllowedCPUs, systemdHasAllowedCPUsErr = systemdSupportsProperty("AllowedCPUs") + }) + return systemdHasAllowedCPUs +} + +// systemdSupportsProperty checks whether systemd supports a property +// It returns an error if it does not. +func systemdSupportsProperty(property string) (bool, error) { + output, err := exec.Command("systemctl", "show", "-p", property, "systemd").Output() + if err != nil { + return false, errors.Wrapf(err, "check systemd %s", property) + } + if len(output) == 0 { + return false, nil + } + return true, nil +} diff --git a/internal/oci/oci_linux.go b/internal/oci/oci_linux.go index 47f8c3e5117..c1727aa95a7 100644 --- a/internal/oci/oci_linux.go +++ b/internal/oci/oci_linux.go @@ -14,6 +14,9 @@ import ( "github.com/containers/podman/v3/pkg/cgroups" "github.com/cri-o/cri-o/internal/config/node" + "github.com/cri-o/cri-o/server/cri/types" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -23,8 +26,20 @@ func (r *runtimeOCI) createContainerPlatform(c *Container, cgroupParent string, if c.Spoofed() { return nil } + g := &generate.Generator{ + Config: &rspec.Spec{ + Linux: &rspec.Linux{ + Resources: &rspec.LinuxResources{}, + }, + }, + } + // Mutate our newly created spec to find the customizations that are needed for conmon + if err := r.config.Workloads.MutateSpecGivenAnnotations(types.InfraContainerName, g, c.Annotations()); err != nil { + return err + } + // Move conmon to specified cgroup - conmonCgroupfsPath, err := r.config.CgroupManager().MoveConmonToCgroup(c.id, cgroupParent, r.config.ConmonCgroup, pid) + conmonCgroupfsPath, err := r.config.CgroupManager().MoveConmonToCgroup(c.id, cgroupParent, r.config.ConmonCgroup, pid, g.Config.Linux.Resources) if err != nil { return err } diff --git a/internal/storage/runtime.go b/internal/storage/runtime.go index 83f950c1e78..5225d5cc4ff 100644 --- a/internal/storage/runtime.go +++ b/internal/storage/runtime.go @@ -383,6 +383,10 @@ func (r *runtimeService) DeleteContainer(idOrName string) error { return ErrInvalidContainerID } container, err := r.storageImageServer.GetStore().Container(idOrName) + // Already deleted + if errors.Is(err, storage.ErrContainerUnknown) { + return nil + } if err != nil { return err } diff --git a/server/cri/types/types.go b/server/cri/types/types.go index 01e8939860a..3eaa45b7290 100644 --- a/server/cri/types/types.go +++ b/server/cri/types/types.go @@ -22,6 +22,8 @@ const ( PodSandboxStateSandboxReady PodSandboxState = 0 PodSandboxStateSandboxNotReady PodSandboxState = 1 + + InfraContainerName = "POD" ) type VersionRequest struct { diff --git a/server/naming.go b/server/naming.go index 9185150c884..f413bda05bd 100644 --- a/server/naming.go +++ b/server/naming.go @@ -10,14 +10,13 @@ import ( const ( kubePrefix = "k8s" - infraName = "POD" nameDelimiter = "_" ) func makeSandboxContainerName(sandboxConfig *types.PodSandboxConfig) string { return strings.Join([]string{ kubePrefix, - infraName, + types.InfraContainerName, sandboxConfig.Metadata.Name, sandboxConfig.Metadata.Namespace, sandboxConfig.Metadata.UID, diff --git a/server/sandbox_run_linux.go b/server/sandbox_run_linux.go index f3e2315d3df..d4f17983263 100644 --- a/server/sandbox_run_linux.go +++ b/server/sandbox_run_linux.go @@ -38,7 +38,6 @@ import ( "golang.org/x/net/context" "golang.org/x/sys/unix" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/kubernetes/pkg/kubelet/leaky" kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types" ) @@ -281,7 +280,7 @@ func (s *Server) runPodSandbox(ctx context.Context, req *types.RunPodSandboxRequ pathsToChown := []string{} // we need to fill in the container name, as it is not present in the request. Luckily, it is a constant. - log.Infof(ctx, "Running pod sandbox: %s%s", translateLabelsToDescription(sbox.Config().Labels), leaky.PodInfraContainerName) + log.Infof(ctx, "Running pod sandbox: %s%s", translateLabelsToDescription(sbox.Config().Labels), types.InfraContainerName) kubeName := sbox.Config().Metadata.Name namespace := sbox.Config().Metadata.Namespace @@ -431,7 +430,7 @@ func (s *Server) runPodSandbox(ctx context.Context, req *types.RunPodSandboxRequ // Add special container name label for the infra container if labels != nil { - labels[kubeletTypes.KubernetesContainerNameLabel] = leaky.PodInfraContainerName + labels[kubeletTypes.KubernetesContainerNameLabel] = types.InfraContainerName } labelsJSON, err := json.Marshal(labels) if err != nil { diff --git a/server/server.go b/server/server.go index 48615d52fcc..7a5e88920cd 100644 --- a/server/server.go +++ b/server/server.go @@ -206,7 +206,7 @@ func (s *Server) restore(ctx context.Context) []string { log.Warnf(ctx, "Unable to delete container %s: %v", n, err) } // Release the infra container name and the pod name for future use - if strings.Contains(n, infraName) { + if strings.Contains(n, types.InfraContainerName) { s.ReleaseContainerName(n) } else { s.ReleasePodName(n) diff --git a/test/workloads.bats b/test/workloads.bats index 49848bfd7bf..e4c56d1dba6 100644 --- a/test/workloads.bats +++ b/test/workloads.bats @@ -8,6 +8,8 @@ function setup() { setup_test sboxconfig="$TESTDIR/sbox.json" ctrconfig="$TESTDIR/ctr.json" + systemd_supports_cpuset=$(systemctl show --property=AllowedCPUs systemd || true) + export systemd_supports_cpuset } function teardown() { @@ -47,6 +49,60 @@ function check_cpu_fields() { fi } +function check_conmon_fields() { + local ctr_id="$1" + local cpushares="$2" + local cpuset="$3" + + if [[ "$CONTAINER_CGROUP_MANAGER" == "cgroupfs" ]]; then + if is_cgroup_v2; then + cpuset_path="/sys/fs/cgroup" + cpushare_path="/sys/fs/cgroup" + cpushare_filename="cpu.weight" + # see https://github.com/containers/crun/blob/e5874864918f8f07acdff083f83a7a59da8abb72/crun.1.md#cpu-controller for conversion + cpushares=$((1 + ((cpushares - 2) * 9999) / 262142)) + else + cpuset_path="/sys/fs/cgroup/cpuset" + cpushare_path="/sys/fs/cgroup/cpu" + cpushare_filename="cpu.shares" + fi + + found_cpuset=$(cat "$cpuset_path/pod_123-456/crio-conmon-$ctr_id/cpuset.cpus") + echo "$found_cpuset" AND "$cpuset" + if [ -z "$cpuset" ]; then + [[ -z "$found_cpushares" ]] + else + [[ "$cpuset" == *"$found_cpuset"* ]] + fi + + echo "$found_cpushares" AND "$cpushares" + found_cpushares=$(cat "$cpushare_path/pod_123-456/crio-conmon-$ctr_id/$cpushare_filename") + if [ -z "$cpushares" ]; then + [[ -z "$found_cpushares" ]] + else + [[ "$cpushares" == *"$found_cpushares"* ]] + fi + else + # don't test cpuset if it's not supported by systemd + if [[ -n "$systemd_supports_cpuset" ]]; then + info="$(systemctl show --property=AllowedCPUs crio-conmon-"$ctr_id".scope)" + if [ -z "$cpuset" ]; then + echo "$info" | grep -E '^AllowedCPUs=$' + else + [[ "$info" == *"AllowedCPUs=$cpuset"* ]] + fi + fi + + info="$(systemctl show --property=CPUShares crio-conmon-"$ctr_id".scope)" + if [ -z "$cpushares" ]; then + # 18446744073709551615 is 2^64-1, which is the default systemd set in RHEL 7 + echo "$info" | grep -E '^CPUShares=\[not set\]$' || echo "$info" | grep 'CPUShares=18446744073709551615' + else + [[ "$info" == *"CPUShares=$cpushares"* ]] + fi + fi +} + @test "test workload gets configured to defaults" { shares="200" set="0-1" @@ -69,7 +125,7 @@ function check_cpu_fields() { shares="200" set="0-1" name=helloctr - create_workload "$shares" "0-2" + create_workload "$shares" "0" start_crio @@ -88,7 +144,7 @@ function check_cpu_fields() { check_cpu_fields "$ctr_id" "$shares" "$set" } -@test "test workload should not set if not defaulted or specified" { +@test "test workload should not be set if not defaulted or specified" { shares="200" set="" name=helloctr @@ -111,7 +167,7 @@ function check_cpu_fields() { check_cpu_fields "$ctr_id" "$shares" "$set" } -@test "test workload should not set if annotation not specified" { +@test "test workload should not be set if annotation not specified" { shares="" set="" name=helloctr @@ -132,3 +188,89 @@ function check_cpu_fields() { ctr_id=$(crictl run "$ctrconfig" "$sboxconfig") check_cpu_fields "$ctr_id" "$shares" "$set" } + +@test "test workload pod gets configured to defaults" { + shares="200" + set="0-1" + create_workload "$shares" "$set" + + start_crio + + jq --arg act "$activation" ' .annotations[$act] = "true"' \ + "$TESTDATA"/sandbox_config.json > "$sboxconfig" + + jq --arg act "$activation" ' .annotations[$act] = "true"' \ + "$TESTDATA"/container_sleep.json > "$ctrconfig" + + ctr_id=$(crictl run "$ctrconfig" "$sboxconfig") + + check_conmon_fields "$ctr_id" "$shares" "$set" +} + +@test "test workload can override pod defaults" { + shares="200" + set="0-1" + name=POD + create_workload "$shares" "0" + + start_crio + + jq --arg act "$activation" --arg set "{\"cpuset\": \"$set\"}" --arg setkey "$prefix/$name" \ + ' .annotations[$act] = "true" + | .annotations[$setkey] = $set' \ + "$TESTDATA"/sandbox_config.json > "$sboxconfig" + + jq --arg act "$activation" --arg name "$name" --arg set "{\"cpuset\": \"$set\"}" --arg setkey "$prefix/$name" \ + ' .annotations[$act] = "true" + | .annotations[$setkey] = $set + | .metadata.name = $name' \ + "$TESTDATA"/container_sleep.json > "$ctrconfig" + + ctr_id=$(crictl run "$ctrconfig" "$sboxconfig") + check_conmon_fields "$ctr_id" "$shares" "$set" +} + +@test "test workload pod should not be set if not defaulted or specified" { + shares="200" + set="" + name=POD + create_workload "$shares" "" + + start_crio + + jq --arg act "$activation" --arg set "{\"cpuset\": \"$set\"}" --arg setkey "$prefix/$name" \ + ' .annotations[$act] = "true" + | .annotations[$setkey] = $set' \ + "$TESTDATA"/sandbox_config.json > "$sboxconfig" + + jq --arg act "$activation" --arg name "$name" --arg set "{\"cpuset\": \"$set\"}" --arg setkey "$prefix/$name" \ + ' .annotations[$act] = "true" + | .annotations[$setkey] = $set + | .metadata.name = $name' \ + "$TESTDATA"/container_sleep.json > "$ctrconfig" + + ctr_id=$(crictl run "$ctrconfig" "$sboxconfig") + check_conmon_fields "$ctr_id" "$shares" "$set" +} + +@test "test workload pod should not be set if annotation not specified" { + shares="" + set="" + name=POD + create_workload "200" "0-1" + + start_crio + + jq --arg act "$activation" --arg set "{\"cpuset\": \"$set\"}" --arg setkey "$prefix/$name" \ + ' .annotations[$setkey] = $set' \ + "$TESTDATA"/sandbox_config.json > "$sboxconfig" + + jq --arg act "$activation" --arg name "$name" --arg set "{\"cpuset\": \"$set\"}" --arg setkey "$prefix/$name" \ + ' .annotations[$setkey] = $set + | .metadata.name = $name + | del(.linux.resources.cpu_shares)' \ + "$TESTDATA"/container_sleep.json > "$ctrconfig" + + ctr_id=$(crictl run "$ctrconfig" "$sboxconfig") + check_conmon_fields "$ctr_id" "$shares" "$set" +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go index 441531fd77d..6afd17851ad 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go @@ -3,9 +3,12 @@ package fs2 import ( + "bufio" stdErrors "errors" + "fmt" "os" "strings" + "time" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" @@ -14,16 +17,6 @@ import ( ) func setFreezer(dirPath string, state configs.FreezerState) error { - if err := supportsFreezer(dirPath); err != nil { - // We can ignore this request as long as the user didn't ask us to - // freeze the container (since without the freezer cgroup, that's a - // no-op). - if state == configs.Undefined || state == configs.Thawed { - return nil - } - return errors.Wrap(err, "freezer not supported") - } - var stateStr string switch state { case configs.Undefined: @@ -36,11 +29,23 @@ func setFreezer(dirPath string, state configs.FreezerState) error { return errors.Errorf("invalid freezer state %q requested", state) } - if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil { + fd, err := fscommon.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR) + if err != nil { + // We can ignore this request as long as the user didn't ask us to + // freeze the container (since without the freezer cgroup, that's a + // no-op). + if state != configs.Frozen { + return nil + } + return errors.Wrap(err, "freezer not supported") + } + defer fd.Close() + + if _, err := fd.WriteString(stateStr); err != nil { return err } // Confirm that the cgroup did actually change states. - if actualState, err := getFreezer(dirPath); err != nil { + if actualState, err := readFreezer(dirPath, fd); err != nil { return err } else if actualState != state { return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState) @@ -48,13 +53,8 @@ func setFreezer(dirPath string, state configs.FreezerState) error { return nil } -func supportsFreezer(dirPath string) error { - _, err := fscommon.ReadFile(dirPath, "cgroup.freeze") - return err -} - func getFreezer(dirPath string) (configs.FreezerState, error) { - state, err := fscommon.ReadFile(dirPath, "cgroup.freeze") + fd, err := fscommon.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY) if err != nil { // If the kernel is too old, then we just treat the freezer as being in // an "undefined" state. @@ -63,12 +63,67 @@ func getFreezer(dirPath string) (configs.FreezerState, error) { } return configs.Undefined, err } - switch strings.TrimSpace(state) { - case "0": + defer fd.Close() + + return readFreezer(dirPath, fd) +} + +func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) { + if _, err := fd.Seek(0, 0); err != nil { + return configs.Undefined, err + } + state := make([]byte, 2) + if _, err := fd.Read(state); err != nil { + return configs.Undefined, err + } + switch string(state) { + case "0\n": return configs.Thawed, nil - case "1": - return configs.Frozen, nil + case "1\n": + return waitFrozen(dirPath) default: return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state) } } + +// waitFrozen polls cgroup.events until it sees "frozen 1" in it. +func waitFrozen(dirPath string) (configs.FreezerState, error) { + fd, err := fscommon.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY) + if err != nil { + return configs.Undefined, err + } + defer fd.Close() + + // XXX: Simple wait/read/retry is used here. An implementation + // based on poll(2) or inotify(7) is possible, but it makes the code + // much more complicated. Maybe address this later. + const ( + // Perform maxIter with waitTime in between iterations. + waitTime = 10 * time.Millisecond + maxIter = 1000 + ) + scanner := bufio.NewScanner(fd) + for i := 0; scanner.Scan(); { + if i == maxIter { + return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter) + } + line := scanner.Text() + val := strings.TrimPrefix(line, "frozen ") + if val != line { // got prefix + if val[0] == '1' { + return configs.Frozen, nil + } + + i++ + // wait, then re-read + time.Sleep(waitTime) + _, err := fd.Seek(0, 0) + if err != nil { + return configs.Undefined, err + } + } + } + // Should only reach here either on read error, + // or if the file does not contain "frozen " line. + return configs.Undefined, scanner.Err() +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go index 91c314e09ea..de69617ee44 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go @@ -476,7 +476,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st } if cpus != "" { - bits, err := rangeToBits(cpus) + bits, err := RangeToBits(cpus) if err != nil { return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w", cpus, err) @@ -485,7 +485,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st newProp("AllowedCPUs", bits)) } if mems != "" { - bits, err := rangeToBits(mems) + bits, err := RangeToBits(mems) if err != nil { return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w", mems, err) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go index 07098218883..264f4c89353 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go @@ -9,11 +9,11 @@ import ( "github.com/willf/bitset" ) -// rangeToBits converts a text representation of a CPU mask (as written to +// RangeToBits converts a text representation of a CPU mask (as written to // or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes // with the corresponding bits set (as consumed by systemd over dbus as // AllowedCPUs/AllowedMemoryNodes unit property value). -func rangeToBits(str string) ([]byte, error) { +func RangeToBits(str string) ([]byte, error) { bits := &bitset.BitSet{} for _, r := range strings.Split(str, ",") { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go index 8abb0feb748..a7a2264c78c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go @@ -96,7 +96,7 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props newProp("CPUWeight", num)) case "cpuset.cpus", "cpuset.mems": - bits, err := rangeToBits(v) + bits, err := RangeToBits(v) if err != nil { return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err) } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/leaky/leaky.go b/vendor/k8s.io/kubernetes/pkg/kubelet/leaky/leaky.go deleted file mode 100644 index 7c75002c47e..00000000000 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/leaky/leaky.go +++ /dev/null @@ -1,25 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Package leaky holds bits of kubelet that should be internal but have leaked -// out through bad abstractions. TODO: delete all of this. -package leaky - -const ( - // PodInfraContainerName is used in a few places outside of Kubelet, such as indexing - // into the container info. - PodInfraContainerName = "POD" -) diff --git a/vendor/modules.txt b/vendor/modules.txt index eedbb8c4ec0..e7dc0e921f7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -765,7 +765,7 @@ github.com/opencontainers/go-digest ## explicit github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.0.0-rc95 +# github.com/opencontainers/runc v1.0.0-rc95.0.20210521141834-a95237f81684 ## explicit github.com/opencontainers/runc/libcontainer/apparmor github.com/opencontainers/runc/libcontainer/cgroups @@ -1482,7 +1482,6 @@ k8s.io/kubernetes/pkg/kubelet/container k8s.io/kubernetes/pkg/kubelet/cri/streaming k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand -k8s.io/kubernetes/pkg/kubelet/leaky k8s.io/kubernetes/pkg/kubelet/types k8s.io/kubernetes/pkg/proxy k8s.io/kubernetes/pkg/proxy/config