Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 39 additions & 16 deletions internal/process/defunct_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,50 +16,60 @@ const ProcessFS = "/proc"

// Stat represents status information of a process from /proc/[pid]/stat.
type Stat struct {
// Pid is the PID of the process
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: missing period at the end of sentence (feel free to ignore).

Pid int

// Comm is the command name (usually the executable filename).
Comm string

// State is the state of the process.
State string

// PPid is the parent PID of the process
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

PPid int
}

// DefunctProcesses returns the number of zombie processes in the node.
func DefunctProcesses() (defunctCount uint, retErr error) {
return DefunctProcessesForPath(ProcessFS)
// ParseDefunctProcesses returns the number of defunct processes on the node,
// as well as the number of defunct children of the current running process.
func ParseDefunctProcesses() (defunctCount uint, defunctChildren []int, retErr error) {
return ParseDefunctProcessesForPathAndParent(ProcessFS, os.Getpid())
}

// DefunctProcessesForPath retrieves the number of zombie processes from
// a specific process filesystem.
func DefunctProcessesForPath(path string) (defunctCount uint, retErr error) {
// ParseDefunctProcessesForPath retrieves the number of zombie processes from
// a specific process filesystem, as well as the number of defunct children of a given parent.
func ParseDefunctProcessesForPathAndParent(path string, parent int) (defunctCount uint, defunctChildren []int, retErr error) {
directories, err := os.Open(path)
if err != nil {
return 0, err
return 0, defunctChildren, err
}
defer directories.Close()

names, err := directories.Readdirnames(-1)
if err != nil {
return 0, err
return 0, defunctChildren, err
}

for _, name := range names {
// Processes have numeric names. If the name cannot
// be parsed to an int, it is not a process name.
if _, err := strconv.ParseInt(name, 10, 0); err != nil {
pid, err := strconv.ParseInt(name, 10, 0)
if err != nil {
continue
}

stat, err := processStats(path, name)
if err != nil {
logrus.Debugf("Failed to get the status of process with PID %s: %v", name, err)
continue
}
if stat.State == "Z" {
logrus.Warnf("Found defunct process with PID %s (%s)", name, stat.Comm)
defunctCount++
logrus.Warnf("Found defunct process with PID %s (%s)", name, stat.Comm)
if stat.PPid == parent {
defunctChildren = append(defunctChildren, int(pid))
}
}
}
return defunctCount, nil
return defunctCount, defunctChildren, nil
}

// processStats returns status information of a process as defined in /proc/[pid]/stat
Expand All @@ -73,21 +83,34 @@ func processStats(fsPath, pid string) (*Stat, error) {
// /proc/[PID]/stat format is described in proc(5). The second field is process name,
// enclosed in parentheses, and it can contain parentheses inside. No other fields
// can have parentheses, so look for the last ')'.
i := strings.LastIndexByte(data, ')')
if i <= 2 || i >= len(data)-1 {
commEnd := strings.LastIndexByte(data, ')')
if commEnd <= 2 || commEnd >= len(data)-1 {
return nil, errors.Errorf("invalid stat data (no comm): %q", data)
}

parts := strings.SplitN(data[:i], " (", 2)
parts := strings.SplitN(data[:commEnd], " (", 2)
if len(parts) != 2 {
return nil, errors.Errorf("invalid stat data (no comm): %q", data)
}

stateIdx := commEnd + 2

// the fourth field is PPid, and we can start looking after the space after State
ppidBegin := stateIdx + 2
ppidEnd := strings.IndexByte(data[ppidBegin:], ' ')

ppid, err := strconv.ParseInt(data[ppidBegin:ppidBegin+ppidEnd], 10, 0)
if err != nil {
return nil, errors.Errorf("invalid stat data (invalid ppid): %q", data[stateIdx+2:ppidEnd])
}

return &Stat{
// The command name is field 2.
Comm: parts[1],

// The state is field 3, which is the first two fields and a space after.
State: string(data[i+2]),
State: string(data[stateIdx]),

PPid: int(ppid),
}, nil
}
20 changes: 13 additions & 7 deletions internal/process/defunct_processes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,43 +11,49 @@ import (

// The actual test suite
var _ = t.Describe("Process", func() {
t.Describe("DefunctProcessesForPath", func() {
t.Describe("ParseDefunctProcessesForPathAndParent", func() {
Context("Should succeed", func() {
It("when given a valid path name and there are defunct processes", func() {
defunctCount, err := process.DefunctProcessesForPath("./testing/proc_success_1")
defunctCount, _, err := process.ParseDefunctProcessesForPathAndParent("./testing/proc_success_1", 0)

Expect(err).To(BeNil())
Expect(defunctCount).To(Equal(uint(7)))
})
It("to get children when given a valid path name and there are defunct processes", func() {
_, children, err := process.ParseDefunctProcessesForPathAndParent("./testing/proc_success_1", 1)

Expect(err).To(BeNil())
Expect(len(children)).To(Equal(2))
})
It("when given a valid path name but there are no defunct processes", func() {
defunctCount, err := process.DefunctProcessesForPath("./testing/proc_success_2")
defunctCount, _, err := process.ParseDefunctProcessesForPathAndParent("./testing/proc_success_2", 0)

Expect(err).To(BeNil())
Expect(defunctCount).To(Equal(uint(0)))
})
It("when given a valid path name but there are no processes", func() {
defunctCount, err := process.DefunctProcessesForPath("./testing/proc_success_3")
defunctCount, _, err := process.ParseDefunctProcessesForPathAndParent("./testing/proc_success_3", 0)

Expect(err).To(BeNil())
Expect(defunctCount).To(Equal(uint(0)))
})
It("when given a valid path name but there are no directories", func() {
defunctCount, err := process.DefunctProcessesForPath("./testing/proc_success_4")
defunctCount, _, err := process.ParseDefunctProcessesForPathAndParent("./testing/proc_success_4", 0)

Expect(err).To(BeNil())
Expect(defunctCount).To(Equal(uint(0)))
})
})
Context("Should fail", func() {
It("when given an invalid path name", func() {
defunctCount, err := process.DefunctProcessesForPath("./test/proc")
defunctCount, _, err := process.ParseDefunctProcessesForPathAndParent("./test/proc", 0)
formattedErr := fmt.Sprintf("%v", err)

Expect(formattedErr).To(Equal("open ./test/proc: no such file or directory"))
Expect(defunctCount).To(Equal(uint(0)))
})
It("when the given path name does not belong to a directory", func() {
defunctCount, err := process.DefunctProcessesForPath("./testing/proc_fail")
defunctCount, _, err := process.ParseDefunctProcessesForPathAndParent("./testing/proc_fail", 0)
formattedErr := fmt.Sprintf("%v", err)

Expect(formattedErr).To(Equal("readdirent ./testing/proc_fail: not a directory"))
Expand Down
53 changes: 53 additions & 0 deletions internal/process/zombie_monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package process

import (
"syscall"
"time"

"github.com/sirupsen/logrus"
)

// ZombieMonitor is a structure for watching and cleaning up zombies on the node.
// It is responsible for cleaning up zombies that are children of the currently running process.
// It does so by occasionally polling for the zombie processes.
// If any zombies are found, there is a delay between when they're identified and when they're cleaned.
// This is to ensure ZombieMonitor doesn't interfere with the go runtime's own child management.
type ZombieMonitor struct {
closeChan chan struct{}
}

// NewZombieMonitor creates and starts the zombie monitor.
func NewZombieMonitor() *ZombieMonitor {
monitor := &ZombieMonitor{
closeChan: make(chan struct{}, 1),
}
go monitor.Start()
return monitor
}

// Shutdown instructs the zombie monitor to stop listening and exit.
func (zm *ZombieMonitor) Shutdown() {
zm.closeChan <- struct{}{}
}

// Start begins the zombie monitor. It will populate the zombie count,
// as well as begin the zombie cleaning process.
func (zm *ZombieMonitor) Start() {
for {
_, zombieChildren, err := ParseDefunctProcesses()
if err != nil {
logrus.Warnf("Failed to get defunct process information: %v", err)
}
select {
case <-zm.closeChan:
// Since the process will soon shutdown, and its children will be reparented, no need to delay the shutdown to cleanup.
return
case <-time.After(defaultZombieChildReapPeriod):
}
for _, child := range zombieChildren {
if _, err := syscall.Wait4(child, nil, syscall.WNOHANG, nil); err != nil {
logrus.Errorf("Failed to reap child process %d: %v", child, err)
}
}
}
}
10 changes: 10 additions & 0 deletions internal/process/zombie_monitor_defaults.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//go:build !test
// +build !test

package process

import "time"

// defaultZombieChildReapPeriod is the period
// the zombie monitor will reap defunct children
var defaultZombieChildReapPeriod = time.Minute * 5
38 changes: 38 additions & 0 deletions internal/process/zombie_monitor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package process_test

import (
"os/exec"
"time"

"github.com/cri-o/cri-o/internal/process"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)

// The actual test suite
var _ = t.Describe("ZombieMonitor", func() {
It("should clean zombie", func() {
cmd := createZombie()
defer cmd.Wait() // nolint:errcheck

monitor := process.NewZombieMonitor()
defer monitor.Shutdown()

Eventually(func() int {
_, defunctChildren, err := process.ParseDefunctProcesses()
Expect(err).To(BeNil())
return len(defunctChildren)
}, time.Second*10, time.Second).Should(Equal(0))
})
})

func createZombie() *exec.Cmd {
cmd := exec.Command("true")
err := cmd.Start()
Expect(err).To(BeNil())

_, defunctChildren, err := process.ParseDefunctProcesses()
Expect(err).To(BeNil())
Expect(len(defunctChildren)).To(Equal(1))
return cmd
}
13 changes: 13 additions & 0 deletions internal/process/zombie_monitor_test_inject.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//go:build test
// +build test

// All *_inject.go files are meant to be used by tests only. Purpose of this
// files is to provide a way to inject mocked data into the current setup.

package process

import "time"

// defaultZombieChildReapPeriod reduces the time waited to reap the children
// for testing purposes
var defaultZombieChildReapPeriod = time.Second * 5
8 changes: 2 additions & 6 deletions server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,8 @@ func New(config *libconfig.MetricsConfig) *Metrics {
Help: "Total number of defunct processes in the node",
},
func() float64 {
total, err := process.DefunctProcesses()
if err == nil {
return float64(total)
}
logrus.Warn(err)
return 0
count, _, _ := process.ParseDefunctProcesses() // nolint:errcheck
return float64(count)
},
),
}
Expand Down
6 changes: 6 additions & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/cri-o/cri-o/internal/lib/sandbox"
"github.com/cri-o/cri-o/internal/log"
"github.com/cri-o/cri-o/internal/oci"
"github.com/cri-o/cri-o/internal/process"
"github.com/cri-o/cri-o/internal/resourcestore"
"github.com/cri-o/cri-o/internal/runtimehandlerhooks"
"github.com/cri-o/cri-o/internal/storage"
Expand Down Expand Up @@ -74,6 +75,8 @@ type Server struct {
pullOperationsLock sync.Mutex

resourceStore *resourcestore.ResourceStore

zombieMonitor *process.ZombieMonitor
}

// pullArguments are used to identify a pullOperation via an input image name and
Expand Down Expand Up @@ -297,6 +300,7 @@ func (s *Server) restore(ctx context.Context) []string {
// Shutdown attempts to shut down the server's storage cleanly
func (s *Server) Shutdown(ctx context.Context) error {
s.resourceStore.Close()
s.zombieMonitor.Shutdown()

if err := s.ContainerServer.Shutdown(); err != nil {
return err
Expand Down Expand Up @@ -507,6 +511,8 @@ func New(
logrus.Debug("Metrics are disabled")
}

s.zombieMonitor = process.NewZombieMonitor()

return s, nil
}

Expand Down