Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 182 additions & 6 deletions internal/runtimehandlerhooks/high_performance_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,19 @@ import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

"github.com/cri-o/cri-o/internal/log"

"github.com/cri-o/cri-o/internal/config/cgmgr"
"github.com/cri-o/cri-o/internal/lib/sandbox"
"github.com/cri-o/cri-o/internal/log"
"github.com/cri-o/cri-o/internal/oci"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
)
Expand All @@ -25,38 +29,117 @@ const (

const (
annotationCPULoadBalancing = "cpu-load-balancing.crio.io"
annotationCPUQuota = "cpu-quota.crio.io"
annotationIRQLoadBalancing = "irq-load-balancing.crio.io"
annotationTrue = "true"
schedDomainDir = "/proc/sys/kernel/sched_domain"
irqSmpAffinityProcFile = "/proc/irq/default_smp_affinity"
cgroupMountPoint = "/sys/fs/cgroup"
)

// HighPerformanceHooks used to run additional hooks that will configure a system for the latency sensitive workloads
type HighPerformanceHooks struct{}

func (h *HighPerformanceHooks) PreStart(ctx context.Context, c *oci.Container, s *sandbox.Sandbox) error {
log.Infof(ctx, "Run %q runtime handler pre-start hook for the container %q", HighPerformance, c.ID())

if isCgroupParentBurstable(s) {
log.Infof(ctx, "Container %q is a burstable pod. Skip PreStart.", c.ID())
return nil
}
if isCgroupParentBestEffort(s) {
log.Infof(ctx, "Container %q is a besteffort pod. Skip PreStart.", c.ID())
return nil
}
if !isContainerRequestWholeCPU(c) {
log.Infof(ctx, "Container %q requests partial cpu(s). Skip PreStart", c.ID())
return nil
}

// disable the CPU load balancing for the container CPUs
if shouldCPULoadBalancingBeDisabled(s.Annotations()) {
log.Infof(ctx, "Disable cpu load balancing for container %q", c.ID())
if err := setCPUSLoadBalancing(c, false, schedDomainDir); err != nil {
return errors.Wrap(err, "set CPU load balancing")
}
}
// disable the IRQ smp load balancing for the container CPUs
if shouldIRQLoadBalancingBeDisabled(s.Annotations()) {
log.Infof(ctx, "Disable irq smp balancing for container %q", c.ID())
if err := setIRQLoadBalancing(c, false, irqSmpAffinityProcFile); err != nil {
return errors.Wrap(err, "set IRQ load balancing")
}
}
// disable the CFS quota for the container CPUs
if shouldCPUQuotaBeDisabled(s.Annotations()) {
log.Infof(ctx, "Disable cpu cfs quota for container %q", c.ID())
cpuMountPoint, err := cgroups.FindCgroupMountpoint(cgroupMountPoint, "cpu")
if err != nil {
return err
}
if err := setCPUQuota(cpuMountPoint, s.CgroupParent(), c, false); err != nil {
return errors.Wrap(err, "set CPU CFS quota")
}
}

return nil
}

func (h *HighPerformanceHooks) PreStop(ctx context.Context, c *oci.Container, s *sandbox.Sandbox) error {
log.Infof(ctx, "Run %q runtime handler pre-stop hook for the container %q", HighPerformance, c.ID())

if isCgroupParentBurstable(s) {
log.Infof(ctx, "Container %q is a burstable pod. Skip PreStop.", c.ID())
return nil
}
if isCgroupParentBestEffort(s) {
log.Infof(ctx, "Container %q is a besteffort pod. Skip PreStop.", c.ID())
return nil
}
if !isContainerRequestWholeCPU(c) {
log.Infof(ctx, "Container %q requests partial cpu(s). Skip PreStop", c.ID())
return nil
}

// enable the CPU load balancing for the container CPUs
if shouldCPULoadBalancingBeDisabled(s.Annotations()) {
if err := setCPUSLoadBalancing(c, true, schedDomainDir); err != nil {
return err
return errors.Wrap(err, "set CPU load balancing")
}
}
// enable the IRQ smp balancing for the container CPUs
if shouldIRQLoadBalancingBeDisabled(s.Annotations()) {
if err := setIRQLoadBalancing(c, true, irqSmpAffinityProcFile); err != nil {
return errors.Wrap(err, "set IRQ load balancing")
}
}
// no need to reverse the cgroup CPU CFS quota setting as the pod cgroup will be deleted anyway

return nil
}

func shouldCPULoadBalancingBeDisabled(annotations fields.Set) bool {
return annotations[annotationCPULoadBalancing] == "true"
return annotations[annotationCPULoadBalancing] == annotationTrue
}

func shouldCPUQuotaBeDisabled(annotations fields.Set) bool {
return annotations[annotationCPUQuota] == annotationTrue
}

func shouldIRQLoadBalancingBeDisabled(annotations fields.Set) bool {
return annotations[annotationIRQLoadBalancing] == annotationTrue
}

func isCgroupParentBurstable(s *sandbox.Sandbox) bool {
return strings.Contains(s.CgroupParent(), "burstable")
}

func isCgroupParentBestEffort(s *sandbox.Sandbox) bool {
return strings.Contains(s.CgroupParent(), "besteffort")
}

func isContainerRequestWholeCPU(c *oci.Container) bool {
return *(c.Spec().Linux.Resources.CPU.Shares)%1024 == 0
}

func setCPUSLoadBalancing(c *oci.Container, enable bool, schedDomainDir string) error {
Expand All @@ -65,7 +148,7 @@ func setCPUSLoadBalancing(c *oci.Container, enable bool, schedDomainDir string)
lspec.Resources == nil ||
lspec.Resources.CPU == nil ||
lspec.Resources.CPU.Cpus == "" {
return fmt.Errorf("failed to find the container %q CPUs", c.ID())
return errors.Errorf("find container %s CPUs", c.ID())
}

cpus, err := cpuset.Parse(lspec.Resources.CPU.Cpus)
Expand Down Expand Up @@ -113,3 +196,96 @@ func setCPUSLoadBalancing(c *oci.Container, enable bool, schedDomainDir string)

return nil
}

func setIRQLoadBalancing(c *oci.Container, enable bool, irqSmpAffinityFile string) error {
lspec := c.Spec().Linux
if lspec == nil ||
lspec.Resources == nil ||
lspec.Resources.CPU == nil ||
lspec.Resources.CPU.Cpus == "" {
return errors.Errorf("find container %s CPUs", c.ID())
}

content, err := ioutil.ReadFile(irqSmpAffinityFile)
if err != nil {
return err
}
currentIRQSMPSetting := strings.TrimSpace(string(content))
newIRQSMPSetting, newIRQBalanceSetting, err := UpdateIRQSmpAffinityMask(lspec.Resources.CPU.Cpus, currentIRQSMPSetting, enable)
if err != nil {
return err
}
if err := ioutil.WriteFile(irqSmpAffinityFile, []byte(newIRQSMPSetting), 0o644); err != nil {
return err
}
if _, err := exec.LookPath("irqbalance"); err != nil {
// irqbalance is not installed, skip the rest; pod should still start, so return nil instead
logrus.Warnf("irqbalance binary not found: %v", err)
return nil
}
// run irqbalance in daemon mode, so this won't cause delay
cmd := exec.Command("irqbalance", "--oneshot")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the irqbalance service already exists, then we may have to update /etc/sysconfig/irqbalance config file with IRQBALANCE_BANNED_CPUS and restart irqbalance service. we could run irqbalance --oneshot command only if the service is not present (This is what i did it here https://github.com/pperiyasamy/irq-smp-balance/blob/main/pkg/irq/util.go#L94).
Shouldn't it be done this way ?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it can be nice, can you please open the PR and we will discuss it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, and we need to discuss whether/how to recover /etc/sysconfig/irqbalance to its default if the computer node rebooted

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One additional challenge that I see with the approach to reconfigure the irqbalance service is its platform dependency. We have seen that Linux distributions like SLES, Ubuntu, RHEL all have slightly different approaches for configuring and managing the daemon, and even within one distribution the way can change between major releases.

Can we find a solution that will work on all relevant platforms? Perhaps add parameters to the cri-o config file to tell, which file to update and how to restart the service?

If not, can the solution be split into a generic part inside cri-o that manages a a file on the host with the wanted banned CPUs, and another platform-specific daemon that reconfigures the host's irqbalance service accordingly?

Copy link
Member

@pperiyasamy pperiyasamy Dec 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, The irqbalance config would present at /etc/sysconfig/ directory in SLES, CentOS platforms whereas in Ubuntu it's present in /etc/default/ directory. Hope this file is just a source file which takes in the format of IRQBALANCE_BANNED_CPUS=<value>. Of course we could pass the config file path using a new RuntimeConfig parameter and made it available to runtime_handler_hooks through Server.config.
we could recover the irqbalance config at crio start time and banned mask derived from /proc/irq/default_smp_affinity. Could we take this approach ?

additionalEnv := "IRQBALANCE_BANNED_CPUS=" + newIRQBalanceSetting
cmd.Env = append(os.Environ(), additionalEnv)
return cmd.Run()
}

func setCPUQuota(cpuMountPoint, parentDir string, c *oci.Container, enable bool) error {
var rpath string
var err error
var cfsQuotaPath string
var parentCfsQuotaPath string
var cgroupManager cgmgr.CgroupManager

if strings.HasSuffix(parentDir, ".slice") {
// systemd fs
if cgroupManager, err = cgmgr.SetCgroupManager("systemd"); err != nil {
return nil
}
parentPath, err := systemd.ExpandSlice(parentDir)
if err != nil {
return err
}
parentCfsQuotaPath = filepath.Join(cpuMountPoint, parentPath, "cpu.cfs_quota_us")
if rpath, err = cgroupManager.ContainerCgroupAbsolutePath(parentDir, c.ID()); err != nil {
return err
}
cfsQuotaPath = filepath.Join(cpuMountPoint, rpath, "cpu.cfs_quota_us")
} else {
// cgroupfs
if cgroupManager, err = cgmgr.SetCgroupManager("cgroupfs"); err != nil {
return nil
}
parentCfsQuotaPath = filepath.Join(cpuMountPoint, parentDir, "cpu.cfs_quota_us")
if rpath, err = cgroupManager.ContainerCgroupAbsolutePath(parentDir, c.ID()); err != nil {
return err
}
cfsQuotaPath = filepath.Join(cpuMountPoint, rpath, "cpu.cfs_quota_us")
}

if _, err := os.Stat(cfsQuotaPath); err != nil {
return err
}
if _, err := os.Stat(parentCfsQuotaPath); err != nil {
return err
}

if enable {
// there should have no use case to get here, as the pod cgroup will be deleted when the pod end
if err := ioutil.WriteFile(cfsQuotaPath, []byte("0"), 0o644); err != nil {
return err
}
if err := ioutil.WriteFile(parentCfsQuotaPath, []byte("0"), 0o644); err != nil {
return err
}
} else {
if err := ioutil.WriteFile(cfsQuotaPath, []byte("-1"), 0o644); err != nil {
return err
}
if err := ioutil.WriteFile(parentCfsQuotaPath, []byte("-1"), 0o644); err != nil {
return err
}
}

return nil
}
Loading