-
Notifications
You must be signed in to change notification settings - Fork 1.1k
added irq smp balance and cpu cfs quota control #4022
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,15 +5,19 @@ import ( | |
| "fmt" | ||
| "io/ioutil" | ||
| "os" | ||
| "os/exec" | ||
| "path/filepath" | ||
| "strconv" | ||
| "strings" | ||
|
|
||
| "github.com/cri-o/cri-o/internal/log" | ||
|
|
||
| "github.com/cri-o/cri-o/internal/config/cgmgr" | ||
| "github.com/cri-o/cri-o/internal/lib/sandbox" | ||
| "github.com/cri-o/cri-o/internal/log" | ||
| "github.com/cri-o/cri-o/internal/oci" | ||
|
|
||
| "github.com/opencontainers/runc/libcontainer/cgroups" | ||
| "github.com/opencontainers/runc/libcontainer/cgroups/systemd" | ||
| "github.com/pkg/errors" | ||
| "github.com/sirupsen/logrus" | ||
| "k8s.io/apimachinery/pkg/fields" | ||
| "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" | ||
| ) | ||
|
|
@@ -25,38 +29,117 @@ const ( | |
|
|
||
| const ( | ||
| annotationCPULoadBalancing = "cpu-load-balancing.crio.io" | ||
| annotationCPUQuota = "cpu-quota.crio.io" | ||
| annotationIRQLoadBalancing = "irq-load-balancing.crio.io" | ||
| annotationTrue = "true" | ||
| schedDomainDir = "/proc/sys/kernel/sched_domain" | ||
| irqSmpAffinityProcFile = "/proc/irq/default_smp_affinity" | ||
| cgroupMountPoint = "/sys/fs/cgroup" | ||
| ) | ||
|
|
||
| // HighPerformanceHooks used to run additional hooks that will configure a system for the latency sensitive workloads | ||
| type HighPerformanceHooks struct{} | ||
|
|
||
| func (h *HighPerformanceHooks) PreStart(ctx context.Context, c *oci.Container, s *sandbox.Sandbox) error { | ||
| log.Infof(ctx, "Run %q runtime handler pre-start hook for the container %q", HighPerformance, c.ID()) | ||
|
|
||
| if isCgroupParentBurstable(s) { | ||
| log.Infof(ctx, "Container %q is a burstable pod. Skip PreStart.", c.ID()) | ||
| return nil | ||
| } | ||
| if isCgroupParentBestEffort(s) { | ||
| log.Infof(ctx, "Container %q is a besteffort pod. Skip PreStart.", c.ID()) | ||
jianzzha marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return nil | ||
| } | ||
| if !isContainerRequestWholeCPU(c) { | ||
| log.Infof(ctx, "Container %q requests partial cpu(s). Skip PreStart", c.ID()) | ||
| return nil | ||
| } | ||
|
|
||
| // disable the CPU load balancing for the container CPUs | ||
| if shouldCPULoadBalancingBeDisabled(s.Annotations()) { | ||
| log.Infof(ctx, "Disable cpu load balancing for container %q", c.ID()) | ||
| if err := setCPUSLoadBalancing(c, false, schedDomainDir); err != nil { | ||
| return errors.Wrap(err, "set CPU load balancing") | ||
| } | ||
| } | ||
| // disable the IRQ smp load balancing for the container CPUs | ||
| if shouldIRQLoadBalancingBeDisabled(s.Annotations()) { | ||
| log.Infof(ctx, "Disable irq smp balancing for container %q", c.ID()) | ||
| if err := setIRQLoadBalancing(c, false, irqSmpAffinityProcFile); err != nil { | ||
| return errors.Wrap(err, "set IRQ load balancing") | ||
| } | ||
| } | ||
| // disable the CFS quota for the container CPUs | ||
| if shouldCPUQuotaBeDisabled(s.Annotations()) { | ||
| log.Infof(ctx, "Disable cpu cfs quota for container %q", c.ID()) | ||
| cpuMountPoint, err := cgroups.FindCgroupMountpoint(cgroupMountPoint, "cpu") | ||
| if err != nil { | ||
| return err | ||
| } | ||
| if err := setCPUQuota(cpuMountPoint, s.CgroupParent(), c, false); err != nil { | ||
| return errors.Wrap(err, "set CPU CFS quota") | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func (h *HighPerformanceHooks) PreStop(ctx context.Context, c *oci.Container, s *sandbox.Sandbox) error { | ||
| log.Infof(ctx, "Run %q runtime handler pre-stop hook for the container %q", HighPerformance, c.ID()) | ||
|
|
||
| if isCgroupParentBurstable(s) { | ||
| log.Infof(ctx, "Container %q is a burstable pod. Skip PreStop.", c.ID()) | ||
| return nil | ||
| } | ||
| if isCgroupParentBestEffort(s) { | ||
| log.Infof(ctx, "Container %q is a besteffort pod. Skip PreStop.", c.ID()) | ||
| return nil | ||
| } | ||
| if !isContainerRequestWholeCPU(c) { | ||
| log.Infof(ctx, "Container %q requests partial cpu(s). Skip PreStop", c.ID()) | ||
| return nil | ||
| } | ||
|
|
||
| // enable the CPU load balancing for the container CPUs | ||
| if shouldCPULoadBalancingBeDisabled(s.Annotations()) { | ||
| if err := setCPUSLoadBalancing(c, true, schedDomainDir); err != nil { | ||
| return err | ||
| return errors.Wrap(err, "set CPU load balancing") | ||
| } | ||
| } | ||
| // enable the IRQ smp balancing for the container CPUs | ||
| if shouldIRQLoadBalancingBeDisabled(s.Annotations()) { | ||
| if err := setIRQLoadBalancing(c, true, irqSmpAffinityProcFile); err != nil { | ||
| return errors.Wrap(err, "set IRQ load balancing") | ||
| } | ||
| } | ||
| // no need to reverse the cgroup CPU CFS quota setting as the pod cgroup will be deleted anyway | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func shouldCPULoadBalancingBeDisabled(annotations fields.Set) bool { | ||
| return annotations[annotationCPULoadBalancing] == "true" | ||
| return annotations[annotationCPULoadBalancing] == annotationTrue | ||
| } | ||
|
|
||
| func shouldCPUQuotaBeDisabled(annotations fields.Set) bool { | ||
| return annotations[annotationCPUQuota] == annotationTrue | ||
| } | ||
|
|
||
| func shouldIRQLoadBalancingBeDisabled(annotations fields.Set) bool { | ||
| return annotations[annotationIRQLoadBalancing] == annotationTrue | ||
| } | ||
|
|
||
| func isCgroupParentBurstable(s *sandbox.Sandbox) bool { | ||
| return strings.Contains(s.CgroupParent(), "burstable") | ||
| } | ||
|
|
||
| func isCgroupParentBestEffort(s *sandbox.Sandbox) bool { | ||
| return strings.Contains(s.CgroupParent(), "besteffort") | ||
| } | ||
|
|
||
| func isContainerRequestWholeCPU(c *oci.Container) bool { | ||
| return *(c.Spec().Linux.Resources.CPU.Shares)%1024 == 0 | ||
| } | ||
|
|
||
| func setCPUSLoadBalancing(c *oci.Container, enable bool, schedDomainDir string) error { | ||
|
|
@@ -65,7 +148,7 @@ func setCPUSLoadBalancing(c *oci.Container, enable bool, schedDomainDir string) | |
| lspec.Resources == nil || | ||
| lspec.Resources.CPU == nil || | ||
| lspec.Resources.CPU.Cpus == "" { | ||
| return fmt.Errorf("failed to find the container %q CPUs", c.ID()) | ||
| return errors.Errorf("find container %s CPUs", c.ID()) | ||
| } | ||
|
|
||
| cpus, err := cpuset.Parse(lspec.Resources.CPU.Cpus) | ||
|
|
@@ -113,3 +196,96 @@ func setCPUSLoadBalancing(c *oci.Container, enable bool, schedDomainDir string) | |
|
|
||
| return nil | ||
| } | ||
|
|
||
| func setIRQLoadBalancing(c *oci.Container, enable bool, irqSmpAffinityFile string) error { | ||
| lspec := c.Spec().Linux | ||
| if lspec == nil || | ||
| lspec.Resources == nil || | ||
| lspec.Resources.CPU == nil || | ||
| lspec.Resources.CPU.Cpus == "" { | ||
| return errors.Errorf("find container %s CPUs", c.ID()) | ||
| } | ||
|
|
||
| content, err := ioutil.ReadFile(irqSmpAffinityFile) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| currentIRQSMPSetting := strings.TrimSpace(string(content)) | ||
| newIRQSMPSetting, newIRQBalanceSetting, err := UpdateIRQSmpAffinityMask(lspec.Resources.CPU.Cpus, currentIRQSMPSetting, enable) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| if err := ioutil.WriteFile(irqSmpAffinityFile, []byte(newIRQSMPSetting), 0o644); err != nil { | ||
| return err | ||
| } | ||
| if _, err := exec.LookPath("irqbalance"); err != nil { | ||
| // irqbalance is not installed, skip the rest; pod should still start, so return nil instead | ||
| logrus.Warnf("irqbalance binary not found: %v", err) | ||
| return nil | ||
| } | ||
| // run irqbalance in daemon mode, so this won't cause delay | ||
| cmd := exec.Command("irqbalance", "--oneshot") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the irqbalance service already exists, then we may have to update /etc/sysconfig/irqbalance config file with IRQBALANCE_BANNED_CPUS and restart irqbalance service. we could run irqbalance --oneshot command only if the service is not present (This is what i did it here https://github.com/pperiyasamy/irq-smp-balance/blob/main/pkg/irq/util.go#L94). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, it can be nice, can you please open the PR and we will discuss it
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, and we need to discuss whether/how to recover /etc/sysconfig/irqbalance to its default if the computer node rebooted There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One additional challenge that I see with the approach to reconfigure the irqbalance service is its platform dependency. We have seen that Linux distributions like SLES, Ubuntu, RHEL all have slightly different approaches for configuring and managing the daemon, and even within one distribution the way can change between major releases. Can we find a solution that will work on all relevant platforms? Perhaps add parameters to the cri-o config file to tell, which file to update and how to restart the service? If not, can the solution be split into a generic part inside cri-o that manages a a file on the host with the wanted banned CPUs, and another platform-specific daemon that reconfigures the host's irqbalance service accordingly?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, The irqbalance config would present at /etc/sysconfig/ directory in SLES, CentOS platforms whereas in Ubuntu it's present in /etc/default/ directory. Hope this file is just a source file which takes in the format of |
||
| additionalEnv := "IRQBALANCE_BANNED_CPUS=" + newIRQBalanceSetting | ||
| cmd.Env = append(os.Environ(), additionalEnv) | ||
| return cmd.Run() | ||
| } | ||
|
|
||
| func setCPUQuota(cpuMountPoint, parentDir string, c *oci.Container, enable bool) error { | ||
| var rpath string | ||
| var err error | ||
| var cfsQuotaPath string | ||
| var parentCfsQuotaPath string | ||
| var cgroupManager cgmgr.CgroupManager | ||
|
|
||
| if strings.HasSuffix(parentDir, ".slice") { | ||
| // systemd fs | ||
| if cgroupManager, err = cgmgr.SetCgroupManager("systemd"); err != nil { | ||
| return nil | ||
| } | ||
| parentPath, err := systemd.ExpandSlice(parentDir) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| parentCfsQuotaPath = filepath.Join(cpuMountPoint, parentPath, "cpu.cfs_quota_us") | ||
| if rpath, err = cgroupManager.ContainerCgroupAbsolutePath(parentDir, c.ID()); err != nil { | ||
| return err | ||
| } | ||
| cfsQuotaPath = filepath.Join(cpuMountPoint, rpath, "cpu.cfs_quota_us") | ||
jianzzha marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else { | ||
| // cgroupfs | ||
| if cgroupManager, err = cgmgr.SetCgroupManager("cgroupfs"); err != nil { | ||
| return nil | ||
| } | ||
| parentCfsQuotaPath = filepath.Join(cpuMountPoint, parentDir, "cpu.cfs_quota_us") | ||
| if rpath, err = cgroupManager.ContainerCgroupAbsolutePath(parentDir, c.ID()); err != nil { | ||
| return err | ||
| } | ||
| cfsQuotaPath = filepath.Join(cpuMountPoint, rpath, "cpu.cfs_quota_us") | ||
| } | ||
|
|
||
| if _, err := os.Stat(cfsQuotaPath); err != nil { | ||
| return err | ||
| } | ||
| if _, err := os.Stat(parentCfsQuotaPath); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if enable { | ||
| // there should have no use case to get here, as the pod cgroup will be deleted when the pod end | ||
| if err := ioutil.WriteFile(cfsQuotaPath, []byte("0"), 0o644); err != nil { | ||
| return err | ||
| } | ||
| if err := ioutil.WriteFile(parentCfsQuotaPath, []byte("0"), 0o644); err != nil { | ||
| return err | ||
| } | ||
| } else { | ||
| if err := ioutil.WriteFile(cfsQuotaPath, []byte("-1"), 0o644); err != nil { | ||
| return err | ||
| } | ||
| if err := ioutil.WriteFile(parentCfsQuotaPath, []byte("-1"), 0o644); err != nil { | ||
| return err | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.