diff --git a/internal/findprocess/findprocess.go b/internal/findprocess/findprocess.go deleted file mode 100644 index 40887b32f06..00000000000 --- a/internal/findprocess/findprocess.go +++ /dev/null @@ -1,30 +0,0 @@ -// Package findprocess provides an os.FindProcess wrapper that -// portably detects non-existent processes. -package findprocess - -import ( - "os" - - "github.com/pkg/errors" -) - -// ErrNotFound represents a target process that does not exist or is -// otherwise not available to the calling process. -var ErrNotFound = errors.New("process not found") - -// FindProcess wraps os.Findprocess [1] to return a public ErrNotFound -// if the process does not exist. The returned process will be nil if -// and only if the returned err is non-nil. -// -// [1]: https://golang.org/pkg/os/#FindProcess -func FindProcess(pid int) (*os.Process, error) { - process, err := findProcess(pid) - if err != nil { - releaseErr := process.Release() - process = nil - if releaseErr != nil { - return process, errors.Wrap(err, releaseErr.Error()) - } - } - return process, err -} diff --git a/internal/findprocess/findprocess_test.go b/internal/findprocess/findprocess_test.go deleted file mode 100644 index 9be500b9e77..00000000000 --- a/internal/findprocess/findprocess_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package findprocess_test - -import ( - "os/exec" - "testing" - - "github.com/cri-o/cri-o/internal/findprocess" - . "github.com/cri-o/cri-o/test/framework" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" -) - -// TestFindprocess runs the created specs -func TestFindprocess(t *testing.T) { - RegisterFailHandler(Fail) - RunFrameworkSpecs(t, "Findprocess") -} - -var t *TestFramework - -var _ = BeforeSuite(func() { - t = NewTestFramework(NilFunc, NilFunc) - t.Setup() -}) - -var _ = AfterSuite(func() { - t.Teardown() -}) - -// The actual test suite -var _ = t.Describe("Findprocess", func() { - It("should succeed to find an existing process", func() { - // Given - cmd := exec.Command("sleep", "1") - Expect(cmd.Start()).To(BeNil()) - - // When - process, err := findprocess.FindProcess(cmd.Process.Pid) - - // Then - Expect(err).To(BeNil()) - Expect(process).NotTo(BeNil()) - }) - - It("should fail to find an already released process", func() { - // Given - // When - process, err := findprocess.FindProcess(-1) - - // Then - Expect(err).NotTo(BeNil()) - Expect(err.Error()).To(ContainSubstring("process already released")) - Expect(process).To(BeNil()) - }) - - It("should fail to find an already finished process", func() { - // Given - cmd := exec.Command("echo") - Expect(cmd.Start()).To(BeNil()) - Expect(cmd.Wait()).To(BeNil()) - - // When - process, err := findprocess.FindProcess(cmd.Process.Pid) - - // Then - Expect(err).NotTo(BeNil()) - Expect(err).To(Equal(findprocess.ErrNotFound)) - Expect(process).To(BeNil()) - }) -}) diff --git a/internal/findprocess/findprocess_unix.go b/internal/findprocess/findprocess_unix.go deleted file mode 100644 index 4dca13357ff..00000000000 --- a/internal/findprocess/findprocess_unix.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build !windows - -package findprocess - -import ( - "os" - "syscall" -) - -func findProcess(pid int) (*os.Process, error) { - process, err := os.FindProcess(pid) - if err != nil { - return process, err - } - - err = process.Signal(syscall.Signal(0)) - if err == nil { - return process, nil - } - if err.Error() == "os: process already finished" { - return process, ErrNotFound - } - return process, err -} diff --git a/internal/findprocess/findprocess_windows.go b/internal/findprocess/findprocess_windows.go deleted file mode 100644 index 3f1dbba63f0..00000000000 --- a/internal/findprocess/findprocess_windows.go +++ /dev/null @@ -1,14 +0,0 @@ -package findprocess - -import ( - "os" -) - -func findProcess(pid int) (*os.Process, error) { - process, err := os.FindProcess(pid) - if err != nil { - // FIXME: is there an analog to POSIX's ESRCH we can check for? - return process, err - } - return process, nil -} diff --git a/internal/lib/sandbox/namespaces.go b/internal/lib/sandbox/namespaces.go index e0e798c1167..968f9f36995 100644 --- a/internal/lib/sandbox/namespaces.go +++ b/internal/lib/sandbox/namespaces.go @@ -18,6 +18,7 @@ const ( IPCNS NSType = "ipc" UTSNS NSType = "uts" USERNS NSType = "user" + PIDNS NSType = "pid" numNamespaces = 4 ) @@ -274,6 +275,14 @@ func (s *Sandbox) UserNsJoin(nspath string) error { return err } +// PidNs specific functions + +// PidNsPath returns the path to the pid namespace of the sandbox. +// If the sandbox uses the host namespace, the empty string is returned. +func (s *Sandbox) PidNsPath() string { + return s.nsPath(nil, PIDNS) +} + // nsJoin checks if the current iface is nil, and if so gets the namespace at nsPath func nsJoin(nsPath string, nsType NSType, currentIface NamespaceIface) (NamespaceIface, error) { if currentIface != nil { @@ -294,7 +303,11 @@ func (s *Sandbox) nsPath(ns NamespaceIface, nsType NSType) string { func infraPid(infra *oci.Container) int { pid := -1 if infra != nil { - pid = infra.State().Pid + var err error + pid, err = infra.Pid() + if err != nil { + logrus.Errorf("pid for infra container %s not found: %v", infra.ID(), err) + } } return pid } @@ -303,9 +316,9 @@ func infraPid(infra *oci.Container) int { // calling a container.State() in batch operations func nsPathGivenInfraPid(ns NamespaceIface, nsType NSType, infraPid int) string { // caller is responsible for checking if infraContainer - // is valid. If not, infraPid should be negative + // is valid. If not, infraPid should be less than or equal to 0 if ns == nil || ns.Get() == nil { - if infraPid >= 0 { + if infraPid > 0 { return infraNsPath(nsType, infraPid) } return "" diff --git a/internal/lib/sandbox/namespaces_test.go b/internal/lib/sandbox/namespaces_test.go index 6a32012ae6e..495c6dacd6e 100644 --- a/internal/lib/sandbox/namespaces_test.go +++ b/internal/lib/sandbox/namespaces_test.go @@ -342,6 +342,13 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { // Then Expect(ns).To(Equal("")) }) + It("should get nothing when pid not set", func() { + // Given + // When + ns := testSandbox.PidNsPath() + // Then + Expect(ns).To(Equal("")) + }) It("should get something when network is set", func() { // Given managedNamespaces := []sandbox.NSType{"net"} @@ -427,6 +434,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { nsPaths := testSandbox.NamespacePaths() // Then Expect(len(nsPaths)).To(Equal(0)) + Expect(testSandbox.PidNsPath()).To(BeEmpty()) }) It("should get something when infra set and pid running", func() { // Given @@ -438,6 +446,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { Expect(ns.Path()).To(ContainSubstring("/proc")) } Expect(len(nsPaths)).To(Equal(numManagedNamespaces)) + Expect(testSandbox.PidNsPath()).To(ContainSubstring("/proc")) }) It("should get nothing when infra set with pid not running", func() { // Given @@ -447,8 +456,9 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { nsPaths := testSandbox.NamespacePaths() // Then Expect(len(nsPaths)).To(Equal(0)) + Expect(testSandbox.PidNsPath()).To(BeEmpty()) }) - It("should get managed path despite infra set", func() { + It("should get managed path (except pid) despite infra set", func() { // Given setupInfraContainerWithPid(1) getPath := pinNamespacesFunctor{ @@ -468,6 +478,8 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { Expect(ns.Path()).NotTo(ContainSubstring("/proc")) } Expect(len(nsPaths)).To(Equal(numManagedNamespaces)) + + Expect(testSandbox.PidNsPath()).To(ContainSubstring("/proc")) }) }) t.Describe("NamespacePaths without infra", func() { @@ -495,6 +507,8 @@ func setupInfraContainerWithPid(pid int) { cstate.State = specs.State{ Pid: pid, } + // eat error here because callers may send invalid pids to test against + _ = cstate.SetInitPid(pid) // nolint:errcheck testContainer.SetState(cstate) Expect(testSandbox.SetInfraContainer(testContainer)).To(BeNil()) diff --git a/internal/oci/container.go b/internal/oci/container.go index 18d0ccee300..be82aec704f 100644 --- a/internal/oci/container.go +++ b/internal/oci/container.go @@ -27,6 +27,7 @@ const defaultStopSignalInt = 15 var ( defaultStopSignal = strconv.Itoa(defaultStopSignalInt) ErrContainerStopped = errors.New("container is already stopped") + ErrNotFound = errors.New("container process not found") ) // Container represents a runtime container. @@ -78,6 +79,11 @@ type ContainerState struct { ExitCode *int32 `json:"exitCode,omitempty"` OOMKilled bool `json:"oomKilled,omitempty"` Error string `json:"error,omitempty"` + InitPid int `json:"initPid,omitempty"` + // The unix start time of the container's init PID. + // This is used to track whether the PID we have stored + // is the same as the corresponding PID on the host. + InitStartTime int `json:"initStartTime,omitempty"` } // NewContainer creates a container object. @@ -154,6 +160,12 @@ func (c *Container) StopSignal() syscall.Signal { } // FromDisk restores container's state from disk +// Calls to FromDisk should always be preceded by call to Runtime.UpdateContainerStatus. +// This is because FromDisk() initializes the InitStartTime for the saved container state +// when CRI-O is being upgraded to a version that supports tracking PID, +// but does no verification the container is actually still running. If we assume the container +// is still running, we could incorrectly think a process with the same PID running on the host +// is our container. A call to `$runtime state` will protect us against this. func (c *Container) FromDisk() error { jsonSource, err := os.Open(c.StatePath()) if err != nil { @@ -162,7 +174,37 @@ func (c *Container) FromDisk() error { defer jsonSource.Close() dec := json.NewDecoder(jsonSource) - return dec.Decode(c.state) + tmpState := &ContainerState{} + if err := dec.Decode(tmpState); err != nil { + return err + } + + // this is to handle the situation in which we're upgrading + // versions of cri-o, and we didn't used to have this information in the state + if tmpState.InitPid == 0 && tmpState.InitStartTime == 0 && tmpState.Pid != 0 { + if err := tmpState.SetInitPid(tmpState.Pid); err != nil { + return err + } + logrus.Infof("PID information for container %s updated to %d %d", c.id, tmpState.InitPid, tmpState.InitStartTime) + } + c.state = tmpState + return nil +} + +// SetInitPid initializes the InitPid and InitStartTime for the container state +// given a PID. +// These values should be set once, and not changed again. +func (cstate *ContainerState) SetInitPid(pid int) error { + if cstate.InitPid != 0 || cstate.InitStartTime != 0 { + return errors.Errorf("pid and start time already initialized: %d %d", cstate.InitPid, cstate.InitStartTime) + } + cstate.InitPid = pid + startTime, err := getPidStartTime(pid) + if err != nil { + return err + } + cstate.InitStartTime = startTime + return nil } // StatePath returns the containers state.json path @@ -346,6 +388,74 @@ func (c *Container) exitFilePath() string { return filepath.Join(c.dir, "exit") } +// IsAlive is a function that checks if a container's init PID exists. +// It is used to check a container state when we don't want a `$runtime state` call +func (c *Container) IsAlive() bool { + _, err := c.pid() + if err != nil { + logrus.Errorf("checking if PID of %s is running failed: %v", c.id, err) + return false + } + + return true +} + +// Pid returns the container's init PID. +// It will fail if the saved PID no longer belongs to the container. +func (c *Container) Pid() (int, error) { + c.opLock.Lock() + defer c.opLock.Unlock() + return c.pid() +} + +// pid returns the container's init PID. +// It checks that we have an InitPid defined in the state, that PID can be found +// and it is the same process that was originally started by the runtime. +func (c *Container) pid() (int, error) { + if c.state == nil { + return 0, errors.New("state not initialized") + } + if c.state.InitPid <= 0 { + return 0, errors.New("PID not initialized") + } + + // container has stopped (as pid is initialized but the runc state has overwritten it) + if c.state.Pid == 0 { + return 0, ErrNotFound + } + + if err := c.verifyPid(); err != nil { + return 0, err + } + return c.state.InitPid, nil +} + +// verifyPid checks that the start time for the process on the node is the same +// as the start time we saved after creating the container. +// This is the simplest way to verify we are operating on the container +// process, and haven't run into PID wrap. +func (c *Container) verifyPid() error { + startTime, err := getPidStartTime(c.state.InitPid) + if err != nil { + return err + } + + if startTime != c.state.InitStartTime { + return errors.New("PID running but not the original container. PID wrap may have occurred") + } + return nil +} + +// getPidStartTime reads the kernel's /proc entry for stime for PID. +func getPidStartTime(pid int) (int, error) { + var st unix.Stat_t + if err := unix.Stat(fmt.Sprintf("/proc/%d", pid), &st); err != nil { + return 0, errors.Wrapf(ErrNotFound, err.Error()) + } + + return int(st.Ctim.Sec), nil +} + // ShouldBeStopped checks whether the container state is in a place // where attempting to stop it makes sense // a container is not stoppable if it's paused or stopped diff --git a/internal/oci/container_test.go b/internal/oci/container_test.go index 94f55940ef7..fd031d2188d 100644 --- a/internal/oci/container_test.go +++ b/internal/oci/container_test.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "os" "path" + "strconv" "time" "github.com/containers/storage/pkg/idtools" @@ -12,9 +13,15 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" pb "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) +const ( + neverRunningPid = 4194305 + alwaysRunningPid = 1 +) + // The actual test suite var _ = t.Describe("Container", func() { // The system under test @@ -168,74 +175,273 @@ var _ = t.Describe("Container", func() { Expect(signal).To(Equal("5")) }) - It("should succeed to get the state from disk", func() { - // Given - Expect(os.MkdirAll(sut.Dir(), 0o755)).To(BeNil()) - Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), - []byte("{}"), 0o644)).To(BeNil()) - defer os.RemoveAll(sut.Dir()) - - // When - err := sut.FromDisk() - - // Then - Expect(err).To(BeNil()) + t.Describe("FromDisk", func() { + BeforeEach(func() { + Expect(os.MkdirAll(sut.Dir(), 0o755)).To(BeNil()) + }) + AfterEach(func() { + os.RemoveAll(sut.Dir()) + }) + It("should succeed to get the state from disk", func() { + // Given + Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), + []byte("{}"), 0o644)).To(BeNil()) + + // When + err := sut.FromDisk() + + // Then + Expect(err).To(BeNil()) + }) + It("should succeed when pid set but initialPid not set", func() { + // Given + Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), []byte(` + {"pid":`+strconv.Itoa(alwaysRunningPid)+`}`), + 0o644)).To(BeNil()) + + // When + err := sut.FromDisk() + + // Then + Expect(err).To(BeNil()) + sutState := sut.State() + Expect(sutState.InitStartTime).NotTo(Equal(0)) + Expect(sutState.InitPid).To(Equal(alwaysRunningPid)) + }) + It("should succeed when pid set but initialPid not set", func() { + // Given + Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), []byte(` + {"pid":`+strconv.Itoa(alwaysRunningPid)+`}`), + 0o644)).To(BeNil()) + + // When + err := sut.FromDisk() + + // Then + Expect(err).To(BeNil()) + sutState := sut.State() + Expect(sutState.InitStartTime).NotTo(Equal(0)) + Expect(sutState.InitPid).To(Equal(alwaysRunningPid)) + }) + It("should fail when pid set and not running", func() { + // Given + Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), []byte(` + {"pid":`+strconv.Itoa(neverRunningPid)+`}`), + 0o644)).To(BeNil()) + + // When + err := sut.FromDisk() + + // Then + Expect(err).NotTo(BeNil()) + sutState := sut.State() + Expect(sutState.InitStartTime).To(Equal(0)) + Expect(sutState.InitPid).To(Equal(0)) + }) + + It("should fail to get the state from disk if invalid json", func() { + // Given + Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), + []byte("invalid"), 0o644)).To(BeNil()) + + // When + err := sut.FromDisk() + + // Then + Expect(err).NotTo(BeNil()) + }) + + It("should fail to get the state from disk if not existing", func() { + // Given + // When + err := sut.FromDisk() + + // Then + Expect(err).NotTo(BeNil()) + }) }) - - It("should fail to get the state from disk if invalid json", func() { - // Given - Expect(os.MkdirAll(sut.Dir(), 0o755)).To(BeNil()) - Expect(ioutil.WriteFile(path.Join(sut.Dir(), "state.json"), - []byte("invalid"), 0o644)).To(BeNil()) - defer os.RemoveAll(sut.Dir()) - - // When - err := sut.FromDisk() - - // Then - Expect(err).NotTo(BeNil()) + t.Describe("ShouldBeStopped", func() { + It("should fail to stop if already stopped", func() { + // Given + state := &oci.ContainerState{} + state.Status = oci.ContainerStateStopped + sut.SetState(state) + // When + err := sut.ShouldBeStopped() + + // Then + Expect(err).To(Equal(oci.ErrContainerStopped)) + }) + It("should fail to stop if paused", func() { + // Given + state := &oci.ContainerState{} + state.Status = oci.ContainerStatePaused + sut.SetState(state) + // When + err := sut.ShouldBeStopped() + + // Then + Expect(err).NotTo(Equal(oci.ErrContainerStopped)) + Expect(err).NotTo(BeNil()) + }) + It("should succeed to stop if started", func() { + // Given + state := &oci.ContainerState{} + state.Status = oci.ContainerStateRunning + sut.SetState(state) + // When + err := sut.ShouldBeStopped() + + // Then + Expect(err).To(BeNil()) + }) }) - - It("should fail to get the state from disk if not existing", func() { - // Given - // When - err := sut.FromDisk() - - // Then - Expect(err).NotTo(BeNil()) - }) - It("should fail to stop if already stopped", func() { - // Given - state := &oci.ContainerState{} - state.Status = oci.ContainerStateStopped - sut.SetState(state) - // When - err := sut.ShouldBeStopped() - - // Then - Expect(err).To(Equal(oci.ErrContainerStopped)) + t.Describe("IsAlive", func() { + It("should be false if pid unintialized", func() { + // Given + state := &oci.ContainerState{} + state.Pid = 0 + sut.SetState(state) + // When + err := sut.IsAlive() + + // Then + Expect(err).To(Equal(false)) + }) + It("should succeed if pid is running", func() { + // Given + state := &oci.ContainerState{} + state.Pid = alwaysRunningPid + Expect(state.SetInitPid(state.Pid)).To(BeNil()) + sut.SetState(state) + // When + err := sut.IsAlive() + + // Then + Expect(err).To(Equal(true)) + }) + It("should be false if pid is not running", func() { + // Given + state := &oci.ContainerState{} + state.Pid = neverRunningPid + // SetInitPid will fail because the pid is not running + Expect(state.SetInitPid(state.Pid)).NotTo(BeNil()) + sut.SetState(state) + // When + err := sut.IsAlive() + + // Then + Expect(err).To(Equal(false)) + }) }) - It("should fail to stop if paused", func() { - // Given - state := &oci.ContainerState{} - state.Status = oci.ContainerStatePaused - sut.SetState(state) - // When - err := sut.ShouldBeStopped() - - // Then - Expect(err).NotTo(Equal(oci.ErrContainerStopped)) - Expect(err).NotTo(BeNil()) + t.Describe("Pid", func() { + It("should fail if container state not set", func() { + // Given + // When + pid, err := sut.Pid() + // Then + Expect(pid).To(Equal(0)) + Expect(err).NotTo(BeNil()) + }) + It("should fail when pid is negative", func() { + // Given + state := &oci.ContainerState{} + state.Pid = -1 + // SetInitPid will fail because the pid is not running + Expect(state.SetInitPid(state.Pid)).NotTo(BeNil()) + sut.SetState(state) + + // When + pid, err := sut.Pid() + // Then + Expect(pid).To(Equal(0)) + Expect(err).NotTo(BeNil()) + }) + It("should fail gracefully when pid has been stopped", func() { + // Given + state := &oci.ContainerState{} + state.Pid = alwaysRunningPid + Expect(state.SetInitPid(state.Pid)).To(BeNil()) + // a `runtime state ctr` call after the container has been stopped + // will set the state pid to 0. However, InitPid never changes + // so we have a separate handle for when Pid is 0 but InitPid is not + state.Pid = 0 + sut.SetState(state) + + // When + pid, err := sut.Pid() + // Then + Expect(pid).To(Equal(0)) + Expect(errors.Is(err, oci.ErrNotFound)).To(Equal(true)) + }) + It("should fail if process is not found", func() { + // Given + state := &oci.ContainerState{} + state.Pid = neverRunningPid + Expect(state.SetInitPid(state.Pid)).NotTo(BeNil()) + sut.SetState(state) + + // When + pid, err := sut.Pid() + // Then + Expect(pid).To(Equal(0)) + Expect(errors.Is(err, oci.ErrNotFound)).To(Equal(true)) + }) + It("should fail when pid has wrapped", func() { + // Given + state := &oci.ContainerState{} + state.Pid = alwaysRunningPid + Expect(state.SetInitPid(state.Pid)).To(BeNil()) + // if InitStartTime != the time the state.InitPid started + // pid wrap is assumed to have happened + state.InitStartTime = 0 + sut.SetState(state) + + // When + pid, err := sut.Pid() + // Then + Expect(pid).To(Equal(0)) + Expect(err).NotTo(BeNil()) + }) + It("should succeed", func() { + // Given + state := &oci.ContainerState{} + state.Pid = alwaysRunningPid + Expect(state.SetInitPid(state.Pid)).To(BeNil()) + sut.SetState(state) + + // When + pid, err := sut.Pid() + // Then + Expect(pid).To(Equal(alwaysRunningPid)) + Expect(err).To(BeNil()) + }) }) - It("should succeed to stop if started", func() { - // Given - state := &oci.ContainerState{} - state.Status = oci.ContainerStateRunning - sut.SetState(state) - // When - err := sut.ShouldBeStopped() - - // Then - Expect(err).To(BeNil()) + t.Describe("SetInitPid", func() { + It("should suceeed if running", func() { + // Given + state := &oci.ContainerState{} + // When + state.Pid = alwaysRunningPid + // Then + Expect(state.SetInitPid(state.Pid)).To(BeNil()) + }) + It("should fail if already set", func() { + // Given + state := &oci.ContainerState{} + state.Pid = alwaysRunningPid + // When + Expect(state.SetInitPid(state.Pid)).To(BeNil()) + // Then + Expect(state.SetInitPid(state.Pid)).NotTo(BeNil()) + }) + It("should fail if not running", func() { + // Given + state := &oci.ContainerState{} + // When + state.Pid = neverRunningPid + // Then + Expect(state.SetInitPid(state.Pid)).NotTo(BeNil()) + }) }) }) diff --git a/internal/oci/container_test_inject.go b/internal/oci/container_test_inject.go index 875a15f0c35..63ae5b40192 100644 --- a/internal/oci/container_test_inject.go +++ b/internal/oci/container_test_inject.go @@ -8,3 +8,17 @@ package oci func (c *Container) SetState(state *ContainerState) { c.state = state } + +// SetStateAndSpoofPid sets the container state +// as well as configures the ProcessInformation to succeed +// useful for tests that don't care about pid handling +func (c *Container) SetStateAndSpoofPid(state *ContainerState) { + // we do this hack because most of the tests + // don't care to set a Pid. + // but rely on calling Pid() + if state.Pid == 0 { + state.Pid = 1 + state.SetInitPid(state.Pid) // nolint:errcheck + } + c.state = state +} diff --git a/internal/oci/oci_linux.go b/internal/oci/oci_linux.go index df6a4e687b9..6e744eac594 100644 --- a/internal/oci/oci_linux.go +++ b/internal/oci/oci_linux.go @@ -85,8 +85,12 @@ func (r *runtimeOCI) containerStats(ctr *Container, cgroup string) (*ContainerSt stats.PIDs = cgroupStats.Pids.Current stats.BlockInput, stats.BlockOutput = calculateBlockIO(cgroupStats) - if ctr.state != nil { - netNsPath := fmt.Sprintf("/proc/%d/ns/net", ctr.state.Pid) + // Try our best to get the net namespace path. + // If pid() errors, the container has stopped, and the /proc entry + // won't exist anyway. + pid, _ := ctr.pid() // nolint:errcheck + if pid > 0 { + netNsPath := fmt.Sprintf("/proc/%d/ns/net", pid) stats.NetInput, stats.NetOutput = getContainerNetIO(netNsPath) } diff --git a/internal/oci/runtime_oci.go b/internal/oci/runtime_oci.go index f5d015673eb..05916c81fea 100644 --- a/internal/oci/runtime_oci.go +++ b/internal/oci/runtime_oci.go @@ -16,7 +16,6 @@ import ( "github.com/containernetworking/plugins/pkg/ns" conmonconfig "github.com/containers/conmon/runner/config" "github.com/containers/storage/pkg/pools" - "github.com/cri-o/cri-o/internal/findprocess" "github.com/cri-o/cri-o/internal/log" "github.com/cri-o/cri-o/pkg/config" "github.com/cri-o/cri-o/utils" @@ -205,12 +204,14 @@ func (r *runtimeOCI) CreateContainer(c *Container, cgroupParent string) (retErr close(ch) }() + var pid int select { case ss := <-ch: if ss.err != nil { return fmt.Errorf("error reading container (probably exited) json message: %v", ss.err) } logrus.Debugf("Received container pid: %d", ss.si.Pid) + pid = ss.si.Pid if ss.si.Pid == -1 { if ss.si.Message != "" { logrus.Errorf("Container creation error: %s", ss.si.Message) @@ -224,6 +225,11 @@ func (r *runtimeOCI) CreateContainer(c *Container, cgroupParent string) (retErr return fmt.Errorf("create container timeout") } + // Now we know the container has started, save the pid to verify against future calls. + if err := c.state.SetInitPid(pid); err != nil { + return err + } + return nil } @@ -561,18 +567,15 @@ func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration, close(done) return default: - process, err := findprocess.FindProcess(c.state.Pid) - if err != nil { - if err != findprocess.ErrNotFound { - logrus.Warnf("failed to find process %d for container %v: %v", c.state.Pid, c.id, err) + if err := c.verifyPid(); err != nil { + // The initial container process either doesn't exist, or isn't ours. + if !errors.Is(err, ErrNotFound) { + logrus.Warnf("failed to find process for container %s: %v", c.id, err) } close(done) return } - err = process.Release() - if err != nil { - logrus.Warnf("failed to release process %d for container %v: %v", c.state.Pid, c.id, err) - } + // the PID is still active and belongs to the container, continue to wait time.Sleep(100 * time.Millisecond) } } @@ -589,8 +592,11 @@ func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration, return fmt.Errorf("timeout reached after %.0f seconds waiting for container process to exit", timeout.Seconds()) } - err := kill(c.state.Pid) + pid, err := c.pid() if err != nil { + return err + } + if err := kill(pid); err != nil { return fmt.Errorf("failed to kill process: %v", err) } } @@ -608,30 +614,19 @@ func (r *runtimeOCI) StopContainer(ctx context.Context, c *Container, timeout in return err } - // Check if the process is around before sending a signal - process, err := findprocess.FindProcess(c.state.Pid) - if err == findprocess.ErrNotFound { + // The initial container process either doesn't exist, or isn't ours. + if err := c.verifyPid(); err != nil { c.state.Finished = time.Now() return nil } - if err != nil { - logrus.Warnf("failed to find process %d for container %q: %v", c.state.Pid, c.id, err) - } else { - err = process.Release() - if err != nil { - logrus.Warnf("failed to release process %d for container %q: %v", c.state.Pid, c.id, err) - } - } if timeout > 0 { if _, err := utils.ExecCmd( r.path, rootFlag, r.root, "kill", c.id, c.GetStopSignal(), ); err != nil { - if err := checkProcessGone(c); err != nil { - return fmt.Errorf("failed to stop container %q: %v", c.id, err) - } + checkProcessGone(c) } - err = waitContainerStop(ctx, c, time.Duration(timeout)*time.Second, true) + err := waitContainerStop(ctx, c, time.Duration(timeout)*time.Second, true) if err == nil { return nil } @@ -641,27 +636,18 @@ func (r *runtimeOCI) StopContainer(ctx context.Context, c *Container, timeout in if _, err := utils.ExecCmd( r.path, rootFlag, r.root, "kill", c.id, "KILL", ); err != nil { - if err := checkProcessGone(c); err != nil { - return fmt.Errorf("failed to stop container %v: %v", c.id, err) - } + checkProcessGone(c) } return waitContainerStop(ctx, c, killContainerTimeout, false) } -func checkProcessGone(c *Container) error { - process, perr := findprocess.FindProcess(c.state.Pid) - if perr == findprocess.ErrNotFound { +func checkProcessGone(c *Container) { + if err := c.verifyPid(); err != nil { + // The initial container process either doesn't exist, or isn't ours. + // Set state accordingly. c.state.Finished = time.Now() - return nil - } - if perr == nil { - err := process.Release() - if err != nil { - logrus.Warnf("failed to release process %d for container %v: %v", c.state.Pid, c.id, err) - } } - return fmt.Errorf("failed to find process: %v", perr) } // DeleteContainer deletes a container. @@ -712,7 +698,6 @@ func (r *runtimeOCI) UpdateContainerStatus(c *Container) error { } out, err := cmd.Output() if err != nil { - logrus.Errorf("Failed to update container state for %s: %v", c.id, err) // there are many code paths that could lead to have a bad state in the // underlying runtime. // On any error like a container went away or we rebooted and containers diff --git a/server/container_create_linux.go b/server/container_create_linux.go index 6871208c432..e81e99a4a56 100644 --- a/server/container_create_linux.go +++ b/server/container_create_linux.go @@ -527,14 +527,11 @@ func (s *Server) createSandboxContainer(ctx context.Context, ctr ctrIface.Contai return nil, err } } else if containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetPid() == pb.NamespaceMode_POD { - infra := sb.InfraContainer() - if infra == nil { - return nil, errors.New("PID namespace requested, but sandbox has no infra container") + pidNsPath := sb.PidNsPath() + if pidNsPath == "" { + return nil, errors.New("PID namespace requested, but sandbox infra container invalid") } - // share Pod PID namespace - // SEE NOTE ABOVE - pidNsPath := fmt.Sprintf("/proc/%d/ns/pid", infra.State().Pid) if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.PIDNamespace), pidNsPath); err != nil { return nil, err } diff --git a/server/container_execsync.go b/server/container_execsync.go index 4928daf50ae..21071f8855b 100644 --- a/server/container_execsync.go +++ b/server/container_execsync.go @@ -3,7 +3,6 @@ package server import ( "fmt" - oci "github.com/cri-o/cri-o/internal/oci" "golang.org/x/net/context" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -17,12 +16,7 @@ func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (*pb.Exe return nil, status.Errorf(codes.NotFound, "could not find container %q: %v", req.ContainerId, err) } - if err := s.Runtime().UpdateContainerStatus(c); err != nil { - return nil, err - } - - cState := c.State() - if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { + if !c.IsAlive() { return nil, fmt.Errorf("container is not created or running") } diff --git a/server/container_status_test.go b/server/container_status_test.go index d5c8118b684..e1ef0935cbe 100644 --- a/server/container_status_test.go +++ b/server/container_status_test.go @@ -32,7 +32,7 @@ var _ = t.Describe("ContainerStatus", func() { // Given addContainerAndSandbox() testContainer.AddVolume(oci.ContainerVolume{}) - testContainer.SetState(givenState) + testContainer.SetStateAndSpoofPid(givenState) testContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) gomock.InOrder( diff --git a/server/inspect.go b/server/inspect.go index 4d14ef39d27..e71b214c476 100644 --- a/server/inspect.go +++ b/server/inspect.go @@ -1,7 +1,6 @@ package server import ( - "errors" "fmt" "math" "net/http" @@ -12,6 +11,7 @@ import ( "github.com/cri-o/cri-o/pkg/types" "github.com/go-zoo/bone" json "github.com/json-iterator/go" + "github.com/pkg/errors" "github.com/sirupsen/logrus" ) diff --git a/server/inspect_ginkgo_test.go b/server/inspect_ginkgo_test.go index c45421c269d..f3336119d73 100644 --- a/server/inspect_ginkgo_test.go +++ b/server/inspect_ginkgo_test.go @@ -4,6 +4,7 @@ import ( "net/http" "net/http/httptest" + "github.com/cri-o/cri-o/internal/oci" "github.com/cri-o/cri-o/internal/storage" "github.com/go-zoo/bone" "github.com/golang/mock/gomock" @@ -45,6 +46,7 @@ var _ = t.Describe("Inspect", func() { It("should succeed with valid /containers route", func() { // Given Expect(sut.AddSandbox(testSandbox)).To(BeNil()) + testContainer.SetStateAndSpoofPid(&oci.ContainerState{}) Expect(testSandbox.SetInfraContainer(testContainer)).To(BeNil()) sut.AddContainer(testContainer) gomock.InOrder( @@ -66,6 +68,7 @@ var _ = t.Describe("Inspect", func() { It("should fail if sandbox not found on /containers route", func() { // Given Expect(sut.AddSandbox(testSandbox)).To(BeNil()) + testContainer.SetStateAndSpoofPid(&oci.ContainerState{}) Expect(testSandbox.SetInfraContainer(testContainer)).To(BeNil()) sut.AddContainer(testContainer) Expect(sut.RemoveSandbox(testSandbox.ID())).To(BeNil()) diff --git a/server/sandbox_status_test.go b/server/sandbox_status_test.go index 2b13be0317c..617797d6809 100644 --- a/server/sandbox_status_test.go +++ b/server/sandbox_status_test.go @@ -24,7 +24,7 @@ var _ = t.Describe("PodSandboxStatus", func() { It("should succeed", func() { // Given addContainerAndSandbox() - testContainer.SetState(&oci.ContainerState{ + testContainer.SetStateAndSpoofPid(&oci.ContainerState{ State: specs.State{Status: oci.ContainerStateRunning}, }) @@ -75,7 +75,7 @@ var _ = t.Describe("PodSandboxStatus", func() { It("should return info as part of a verbose response", func() { // Given addContainerAndSandbox() - testContainer.SetState(&oci.ContainerState{ + testContainer.SetStateAndSpoofPid(&oci.ContainerState{ State: specs.State{Status: oci.ContainerStateRunning}, }) testContainer.SetSpec(&specs.Spec{Version: "1.0.0"})