Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"strconv"
"strings"
"syscall"
"time"

"github.com/containers/podman/v3/pkg/lookup"
"github.com/cri-o/cri-o/internal/dbusmgr"
Expand Down Expand Up @@ -54,9 +55,8 @@ func StatusToExitCode(status int) int {
}

// RunUnderSystemdScope adds the specified pid to a systemd scope
func RunUnderSystemdScope(mgr *dbusmgr.DbusConnManager, pid int, slice, unitName string, properties ...systemdDbus.Property) error {
func RunUnderSystemdScope(mgr *dbusmgr.DbusConnManager, pid int, slice, unitName string, properties ...systemdDbus.Property) (err error) {
ctx := context.Background()
var err error
// sanity check
if mgr == nil {
return errors.New("dbus manager is nil")
Expand All @@ -73,14 +73,24 @@ func RunUnderSystemdScope(mgr *dbusmgr.DbusConnManager, pid int, slice, unitName
ch := make(chan string)
if err := mgr.RetryOnDisconnect(func(c *systemdDbus.Conn) error {
_, err = c.StartTransientUnitContext(ctx, unitName, "replace", properties, ch)
return err
return errors.Wrap(err, "start transient unit")
}); err != nil {
return err
}

// Block until job is started
<-ch
close(ch)
select {
case <-ch:
close(ch)
case <-time.After(time.Minute * 6):
// This case is a work around to catch situations where the dbus library sends the
// request but it unexpectedly disappears. We set the timeout very high to make sure
// we wait as long as possible to catch situations where dbus is overwhelmed.
// We also don't use the native context cancelling behavior of the dbus library,
// because experience has shown that it does not help.
// TODO: Find cause of the request being dropped in the dbus library and fix it.
return errors.Errorf("timed out moving conmon with pid %d to cgroup", pid)
}
Comment on lines 74 to +93
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two ideas coming into my mind:

  • What if we reduce the timeout to 1 minute and retry multiple times with an exponential backoff?
  • What if we check for the error "Message recipient disconnected from message bus without replying" and restart dbus in that case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how would we be able to get the error in this case?


return nil
}
Expand Down