diff --git a/internal/oci/container.go b/internal/oci/container.go index 0d3797e303f..679e4732265 100644 --- a/internal/oci/container.go +++ b/internal/oci/container.go @@ -70,6 +70,7 @@ type Container struct { stopping bool stopTimeoutChan chan time.Duration stoppedChan chan struct{} + stopStoppingChan chan struct{} stopLock sync.Mutex } @@ -113,20 +114,21 @@ func NewContainer(id, name, bundlePath, logPath string, labels, crioAnnotations, }, ImageRef: imageRef, }, - name: name, - bundlePath: bundlePath, - logPath: logPath, - terminal: terminal, - stdin: stdin, - stdinOnce: stdinOnce, - runtimeHandler: runtimeHandler, - crioAnnotations: crioAnnotations, - imageName: imageName, - dir: dir, - state: state, - stopSignal: stopSignal, - stopTimeoutChan: make(chan time.Duration, 1), - stoppedChan: make(chan struct{}, 1), + name: name, + bundlePath: bundlePath, + logPath: logPath, + terminal: terminal, + stdin: stdin, + stdinOnce: stdinOnce, + runtimeHandler: runtimeHandler, + crioAnnotations: crioAnnotations, + imageName: imageName, + dir: dir, + state: state, + stopSignal: stopSignal, + stopTimeoutChan: make(chan time.Duration, 1), + stoppedChan: make(chan struct{}, 1), + stopStoppingChan: make(chan struct{}, 1), } return c, nil } @@ -561,11 +563,14 @@ func (c *Container) SetAsStopping(timeout int64) { select { case c.stopTimeoutChan <- time.Duration(timeout) * time.Second: case <-c.stoppedChan: // This case is to avoid waiting forever once another routine has finished. - return + case <-c.stopStoppingChan: // This case is to avoid deadlocking with SetAsNotStopping. } + return } // Regardless, set the container as actively stopping. c.stopping = true + // And reset the stopStoppingChan + c.stopStoppingChan = make(chan struct{}, 1) } // SetAsNotStopping unsets the stopping field indicating to new callers that the container diff --git a/internal/oci/oci.go b/internal/oci/oci.go index 638640c8eb1..dbf63cb95d6 100644 --- a/internal/oci/oci.go +++ b/internal/oci/oci.go @@ -126,8 +126,9 @@ func (r *Runtime) WaitContainerStateStopped(ctx context.Context, c *Container) e return nil } - done := make(chan error) - chControl := make(chan struct{}) + done := make(chan error, 1) + chControl := make(chan struct{}, 1) + defer close(chControl) go func() { defer close(done) for { @@ -152,10 +153,8 @@ func (r *Runtime) WaitContainerStateStopped(ctx context.Context, c *Container) e case err = <-done: break case <-ctx.Done(): - close(chControl) return ctx.Err() case <-time.After(time.Duration(r.config.CtrStopTimeout) * time.Second): - close(chControl) return fmt.Errorf( "failed to get container stopped status: %ds timeout reached", r.config.CtrStopTimeout, diff --git a/internal/oci/runtime_oci.go b/internal/oci/runtime_oci.go index 639c9b173ca..4d46ecf6c3a 100644 --- a/internal/oci/runtime_oci.go +++ b/internal/oci/runtime_oci.go @@ -692,6 +692,9 @@ func (r *runtimeOCI) StopContainer(ctx context.Context, c *Container, timeout in // Otherwise, we won't actually // attempt to stop when a new request comes in, // even though we're not actively stopping anymore. + // Also, close the stopStoppingChan to tell + // routines waiting to change the stop timeout to give up. + close(c.stopStoppingChan) c.SetAsNotStopping() } }()